qemu/hw/virtio/virtio-pci.c
<<
>>
Prefs
   1/*
   2 * Virtio PCI Bindings
   3 *
   4 * Copyright IBM, Corp. 2007
   5 * Copyright (c) 2009 CodeSourcery
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Paul Brook        <paul@codesourcery.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 *
  14 * Contributions after 2012-01-13 are licensed under the terms of the
  15 * GNU GPL, version 2 or (at your option) any later version.
  16 */
  17
  18#include "qemu/osdep.h"
  19
  20#include "exec/memop.h"
  21#include "standard-headers/linux/virtio_pci.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "hw/pci/pci.h"
  25#include "hw/pci/pci_bus.h"
  26#include "hw/qdev-properties.h"
  27#include "qapi/error.h"
  28#include "qemu/error-report.h"
  29#include "qemu/module.h"
  30#include "hw/pci/msi.h"
  31#include "hw/pci/msix.h"
  32#include "hw/loader.h"
  33#include "sysemu/kvm.h"
  34#include "virtio-pci.h"
  35#include "qemu/range.h"
  36#include "hw/virtio/virtio-bus.h"
  37#include "qapi/visitor.h"
  38
  39#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
  40
  41#undef VIRTIO_PCI_CONFIG
  42
  43/* The remaining space is defined by each driver as the per-driver
  44 * configuration space */
  45#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
  46
  47static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
  48                               VirtIOPCIProxy *dev);
  49static void virtio_pci_reset(DeviceState *qdev);
  50
  51/* virtio device */
  52/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
  53static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
  54{
  55    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  56}
  57
  58/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
  59 * be careful and test performance if you change this.
  60 */
  61static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
  62{
  63    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  64}
  65
  66static void virtio_pci_notify(DeviceState *d, uint16_t vector)
  67{
  68    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
  69
  70    if (msix_enabled(&proxy->pci_dev))
  71        msix_notify(&proxy->pci_dev, vector);
  72    else {
  73        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  74        pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1);
  75    }
  76}
  77
  78static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
  79{
  80    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
  81    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  82
  83    pci_device_save(&proxy->pci_dev, f);
  84    msix_save(&proxy->pci_dev, f);
  85    if (msix_present(&proxy->pci_dev))
  86        qemu_put_be16(f, vdev->config_vector);
  87}
  88
  89static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
  90    .name = "virtio_pci/modern_queue_state",
  91    .version_id = 1,
  92    .minimum_version_id = 1,
  93    .fields = (VMStateField[]) {
  94        VMSTATE_UINT16(num, VirtIOPCIQueue),
  95        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
  96        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
  97        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
  98        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
  99        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
 100        VMSTATE_END_OF_LIST()
 101    }
 102};
 103
 104static bool virtio_pci_modern_state_needed(void *opaque)
 105{
 106    VirtIOPCIProxy *proxy = opaque;
 107
 108    return virtio_pci_modern(proxy);
 109}
 110
 111static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
 112    .name = "virtio_pci/modern_state",
 113    .version_id = 1,
 114    .minimum_version_id = 1,
 115    .needed = &virtio_pci_modern_state_needed,
 116    .fields = (VMStateField[]) {
 117        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
 118        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
 119        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
 120        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
 121                             vmstate_virtio_pci_modern_queue_state,
 122                             VirtIOPCIQueue),
 123        VMSTATE_END_OF_LIST()
 124    }
 125};
 126
 127static const VMStateDescription vmstate_virtio_pci = {
 128    .name = "virtio_pci",
 129    .version_id = 1,
 130    .minimum_version_id = 1,
 131    .minimum_version_id_old = 1,
 132    .fields = (VMStateField[]) {
 133        VMSTATE_END_OF_LIST()
 134    },
 135    .subsections = (const VMStateDescription*[]) {
 136        &vmstate_virtio_pci_modern_state_sub,
 137        NULL
 138    }
 139};
 140
 141static bool virtio_pci_has_extra_state(DeviceState *d)
 142{
 143    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 144
 145    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
 146}
 147
 148static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
 149{
 150    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 151
 152    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
 153}
 154
 155static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
 156{
 157    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 158
 159    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
 160}
 161
 162static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
 163{
 164    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 165    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 166
 167    if (msix_present(&proxy->pci_dev))
 168        qemu_put_be16(f, virtio_queue_vector(vdev, n));
 169}
 170
 171static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
 172{
 173    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 174    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 175
 176    int ret;
 177    ret = pci_device_load(&proxy->pci_dev, f);
 178    if (ret) {
 179        return ret;
 180    }
 181    msix_unuse_all_vectors(&proxy->pci_dev);
 182    msix_load(&proxy->pci_dev, f);
 183    if (msix_present(&proxy->pci_dev)) {
 184        qemu_get_be16s(f, &vdev->config_vector);
 185    } else {
 186        vdev->config_vector = VIRTIO_NO_VECTOR;
 187    }
 188    if (vdev->config_vector != VIRTIO_NO_VECTOR) {
 189        return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
 190    }
 191    return 0;
 192}
 193
 194static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
 195{
 196    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 197    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 198
 199    uint16_t vector;
 200    if (msix_present(&proxy->pci_dev)) {
 201        qemu_get_be16s(f, &vector);
 202    } else {
 203        vector = VIRTIO_NO_VECTOR;
 204    }
 205    virtio_queue_set_vector(vdev, n, vector);
 206    if (vector != VIRTIO_NO_VECTOR) {
 207        return msix_vector_use(&proxy->pci_dev, vector);
 208    }
 209
 210    return 0;
 211}
 212
 213static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
 214{
 215    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 216
 217    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
 218}
 219
 220#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
 221
 222static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
 223{
 224    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
 225        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
 226}
 227
 228static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
 229                                       int n, bool assign)
 230{
 231    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 232    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 233    VirtQueue *vq = virtio_get_queue(vdev, n);
 234    bool legacy = virtio_pci_legacy(proxy);
 235    bool modern = virtio_pci_modern(proxy);
 236    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
 237    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
 238    MemoryRegion *modern_mr = &proxy->notify.mr;
 239    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
 240    MemoryRegion *legacy_mr = &proxy->bar;
 241    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
 242                         virtio_get_queue_index(vq);
 243    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
 244
 245    if (assign) {
 246        if (modern) {
 247            if (fast_mmio) {
 248                memory_region_add_eventfd(modern_mr, modern_addr, 0,
 249                                          false, n, notifier);
 250            } else {
 251                memory_region_add_eventfd(modern_mr, modern_addr, 2,
 252                                          false, n, notifier);
 253            }
 254            if (modern_pio) {
 255                memory_region_add_eventfd(modern_notify_mr, 0, 2,
 256                                              true, n, notifier);
 257            }
 258        }
 259        if (legacy) {
 260            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
 261                                      true, n, notifier);
 262        }
 263    } else {
 264        if (modern) {
 265            if (fast_mmio) {
 266                memory_region_del_eventfd(modern_mr, modern_addr, 0,
 267                                          false, n, notifier);
 268            } else {
 269                memory_region_del_eventfd(modern_mr, modern_addr, 2,
 270                                          false, n, notifier);
 271            }
 272            if (modern_pio) {
 273                memory_region_del_eventfd(modern_notify_mr, 0, 2,
 274                                          true, n, notifier);
 275            }
 276        }
 277        if (legacy) {
 278            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
 279                                      true, n, notifier);
 280        }
 281    }
 282    return 0;
 283}
 284
 285static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 286{
 287    virtio_bus_start_ioeventfd(&proxy->bus);
 288}
 289
 290static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 291{
 292    virtio_bus_stop_ioeventfd(&proxy->bus);
 293}
 294
 295static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 296{
 297    VirtIOPCIProxy *proxy = opaque;
 298    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 299    hwaddr pa;
 300
 301    switch (addr) {
 302    case VIRTIO_PCI_GUEST_FEATURES:
 303        /* Guest does not negotiate properly?  We have to assume nothing. */
 304        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
 305            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
 306        }
 307        virtio_set_features(vdev, val);
 308        break;
 309    case VIRTIO_PCI_QUEUE_PFN:
 310        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 311        if (pa == 0) {
 312            virtio_pci_reset(DEVICE(proxy));
 313        }
 314        else
 315            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
 316        break;
 317    case VIRTIO_PCI_QUEUE_SEL:
 318        if (val < VIRTIO_QUEUE_MAX)
 319            vdev->queue_sel = val;
 320        break;
 321    case VIRTIO_PCI_QUEUE_NOTIFY:
 322        if (val < VIRTIO_QUEUE_MAX) {
 323            virtio_queue_notify(vdev, val);
 324        }
 325        break;
 326    case VIRTIO_PCI_STATUS:
 327        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
 328            virtio_pci_stop_ioeventfd(proxy);
 329        }
 330
 331        virtio_set_status(vdev, val & 0xFF);
 332
 333        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
 334            virtio_pci_start_ioeventfd(proxy);
 335        }
 336
 337        if (vdev->status == 0) {
 338            virtio_pci_reset(DEVICE(proxy));
 339        }
 340
 341        /* Linux before 2.6.34 drives the device without enabling
 342           the PCI device bus master bit. Enable it automatically
 343           for the guest. This is a PCI spec violation but so is
 344           initiating DMA with bus master bit clear. */
 345        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
 346            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
 347                                     proxy->pci_dev.config[PCI_COMMAND] |
 348                                     PCI_COMMAND_MASTER, 1);
 349        }
 350        break;
 351    case VIRTIO_MSI_CONFIG_VECTOR:
 352        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
 353        /* Make it possible for guest to discover an error took place. */
 354        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 355            val = VIRTIO_NO_VECTOR;
 356        vdev->config_vector = val;
 357        break;
 358    case VIRTIO_MSI_QUEUE_VECTOR:
 359        msix_vector_unuse(&proxy->pci_dev,
 360                          virtio_queue_vector(vdev, vdev->queue_sel));
 361        /* Make it possible for guest to discover an error took place. */
 362        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 363            val = VIRTIO_NO_VECTOR;
 364        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
 365        break;
 366    default:
 367        error_report("%s: unexpected address 0x%x value 0x%x",
 368                     __func__, addr, val);
 369        break;
 370    }
 371}
 372
 373static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 374{
 375    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 376    uint32_t ret = 0xFFFFFFFF;
 377
 378    switch (addr) {
 379    case VIRTIO_PCI_HOST_FEATURES:
 380        ret = vdev->host_features;
 381        break;
 382    case VIRTIO_PCI_GUEST_FEATURES:
 383        ret = vdev->guest_features;
 384        break;
 385    case VIRTIO_PCI_QUEUE_PFN:
 386        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
 387              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 388        break;
 389    case VIRTIO_PCI_QUEUE_NUM:
 390        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
 391        break;
 392    case VIRTIO_PCI_QUEUE_SEL:
 393        ret = vdev->queue_sel;
 394        break;
 395    case VIRTIO_PCI_STATUS:
 396        ret = vdev->status;
 397        break;
 398    case VIRTIO_PCI_ISR:
 399        /* reading from the ISR also clears it. */
 400        ret = atomic_xchg(&vdev->isr, 0);
 401        pci_irq_deassert(&proxy->pci_dev);
 402        break;
 403    case VIRTIO_MSI_CONFIG_VECTOR:
 404        ret = vdev->config_vector;
 405        break;
 406    case VIRTIO_MSI_QUEUE_VECTOR:
 407        ret = virtio_queue_vector(vdev, vdev->queue_sel);
 408        break;
 409    default:
 410        break;
 411    }
 412
 413    return ret;
 414}
 415
 416static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
 417                                       unsigned size)
 418{
 419    VirtIOPCIProxy *proxy = opaque;
 420    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 421    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 422    uint64_t val = 0;
 423    if (addr < config) {
 424        return virtio_ioport_read(proxy, addr);
 425    }
 426    addr -= config;
 427
 428    switch (size) {
 429    case 1:
 430        val = virtio_config_readb(vdev, addr);
 431        break;
 432    case 2:
 433        val = virtio_config_readw(vdev, addr);
 434        if (virtio_is_big_endian(vdev)) {
 435            val = bswap16(val);
 436        }
 437        break;
 438    case 4:
 439        val = virtio_config_readl(vdev, addr);
 440        if (virtio_is_big_endian(vdev)) {
 441            val = bswap32(val);
 442        }
 443        break;
 444    }
 445    return val;
 446}
 447
 448static void virtio_pci_config_write(void *opaque, hwaddr addr,
 449                                    uint64_t val, unsigned size)
 450{
 451    VirtIOPCIProxy *proxy = opaque;
 452    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 453    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 454    if (addr < config) {
 455        virtio_ioport_write(proxy, addr, val);
 456        return;
 457    }
 458    addr -= config;
 459    /*
 460     * Virtio-PCI is odd. Ioports are LE but config space is target native
 461     * endian.
 462     */
 463    switch (size) {
 464    case 1:
 465        virtio_config_writeb(vdev, addr, val);
 466        break;
 467    case 2:
 468        if (virtio_is_big_endian(vdev)) {
 469            val = bswap16(val);
 470        }
 471        virtio_config_writew(vdev, addr, val);
 472        break;
 473    case 4:
 474        if (virtio_is_big_endian(vdev)) {
 475            val = bswap32(val);
 476        }
 477        virtio_config_writel(vdev, addr, val);
 478        break;
 479    }
 480}
 481
 482static const MemoryRegionOps virtio_pci_config_ops = {
 483    .read = virtio_pci_config_read,
 484    .write = virtio_pci_config_write,
 485    .impl = {
 486        .min_access_size = 1,
 487        .max_access_size = 4,
 488    },
 489    .endianness = DEVICE_LITTLE_ENDIAN,
 490};
 491
 492static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
 493                                                 hwaddr *off, int len)
 494{
 495    int i;
 496    VirtIOPCIRegion *reg;
 497
 498    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
 499        reg = &proxy->regs[i];
 500        if (*off >= reg->offset &&
 501            *off + len <= reg->offset + reg->size) {
 502            *off -= reg->offset;
 503            return &reg->mr;
 504        }
 505    }
 506
 507    return NULL;
 508}
 509
 510/* Below are generic functions to do memcpy from/to an address space,
 511 * without byteswaps, with input validation.
 512 *
 513 * As regular address_space_* APIs all do some kind of byteswap at least for
 514 * some host/target combinations, we are forced to explicitly convert to a
 515 * known-endianness integer value.
 516 * It doesn't really matter which endian format to go through, so the code
 517 * below selects the endian that causes the least amount of work on the given
 518 * host.
 519 *
 520 * Note: host pointer must be aligned.
 521 */
 522static
 523void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
 524                                const uint8_t *buf, int len)
 525{
 526    uint64_t val;
 527    MemoryRegion *mr;
 528
 529    /* address_space_* APIs assume an aligned address.
 530     * As address is under guest control, handle illegal values.
 531     */
 532    addr &= ~(len - 1);
 533
 534    mr = virtio_address_space_lookup(proxy, &addr, len);
 535    if (!mr) {
 536        return;
 537    }
 538
 539    /* Make sure caller aligned buf properly */
 540    assert(!(((uintptr_t)buf) & (len - 1)));
 541
 542    switch (len) {
 543    case 1:
 544        val = pci_get_byte(buf);
 545        break;
 546    case 2:
 547        val = pci_get_word(buf);
 548        break;
 549    case 4:
 550        val = pci_get_long(buf);
 551        break;
 552    default:
 553        /* As length is under guest control, handle illegal values. */
 554        return;
 555    }
 556    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
 557                                 MEMTXATTRS_UNSPECIFIED);
 558}
 559
 560static void
 561virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
 562                          uint8_t *buf, int len)
 563{
 564    uint64_t val;
 565    MemoryRegion *mr;
 566
 567    /* address_space_* APIs assume an aligned address.
 568     * As address is under guest control, handle illegal values.
 569     */
 570    addr &= ~(len - 1);
 571
 572    mr = virtio_address_space_lookup(proxy, &addr, len);
 573    if (!mr) {
 574        return;
 575    }
 576
 577    /* Make sure caller aligned buf properly */
 578    assert(!(((uintptr_t)buf) & (len - 1)));
 579
 580    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
 581                                MEMTXATTRS_UNSPECIFIED);
 582    switch (len) {
 583    case 1:
 584        pci_set_byte(buf, val);
 585        break;
 586    case 2:
 587        pci_set_word(buf, val);
 588        break;
 589    case 4:
 590        pci_set_long(buf, val);
 591        break;
 592    default:
 593        /* As length is under guest control, handle illegal values. */
 594        break;
 595    }
 596}
 597
 598static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 599                                uint32_t val, int len)
 600{
 601    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 602    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 603    struct virtio_pci_cfg_cap *cfg;
 604
 605    pci_default_write_config(pci_dev, address, val, len);
 606
 607    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
 608        pcie_cap_flr_write_config(pci_dev, address, val, len);
 609    }
 610
 611    if (range_covers_byte(address, len, PCI_COMMAND) &&
 612        !(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
 613        virtio_pci_stop_ioeventfd(proxy);
 614        virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
 615    }
 616
 617    if (proxy->config_cap &&
 618        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 619                                                                  pci_cfg_data),
 620                       sizeof cfg->pci_cfg_data)) {
 621        uint32_t off;
 622        uint32_t len;
 623
 624        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 625        off = le32_to_cpu(cfg->cap.offset);
 626        len = le32_to_cpu(cfg->cap.length);
 627
 628        if (len == 1 || len == 2 || len == 4) {
 629            assert(len <= sizeof cfg->pci_cfg_data);
 630            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
 631        }
 632    }
 633}
 634
 635static uint32_t virtio_read_config(PCIDevice *pci_dev,
 636                                   uint32_t address, int len)
 637{
 638    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 639    struct virtio_pci_cfg_cap *cfg;
 640
 641    if (proxy->config_cap &&
 642        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 643                                                                  pci_cfg_data),
 644                       sizeof cfg->pci_cfg_data)) {
 645        uint32_t off;
 646        uint32_t len;
 647
 648        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 649        off = le32_to_cpu(cfg->cap.offset);
 650        len = le32_to_cpu(cfg->cap.length);
 651
 652        if (len == 1 || len == 2 || len == 4) {
 653            assert(len <= sizeof cfg->pci_cfg_data);
 654            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
 655        }
 656    }
 657
 658    return pci_default_read_config(pci_dev, address, len);
 659}
 660
 661static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
 662                                        unsigned int queue_no,
 663                                        unsigned int vector)
 664{
 665    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 666    int ret;
 667
 668    if (irqfd->users == 0) {
 669        ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
 670        if (ret < 0) {
 671            return ret;
 672        }
 673        irqfd->virq = ret;
 674    }
 675    irqfd->users++;
 676    return 0;
 677}
 678
 679static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 680                                             unsigned int vector)
 681{
 682    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 683    if (--irqfd->users == 0) {
 684        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
 685    }
 686}
 687
 688static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
 689                                 unsigned int queue_no,
 690                                 unsigned int vector)
 691{
 692    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 693    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 694    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 695    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 696    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 697}
 698
 699static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
 700                                      unsigned int queue_no,
 701                                      unsigned int vector)
 702{
 703    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 704    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 705    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 706    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 707    int ret;
 708
 709    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
 710    assert(ret == 0);
 711}
 712
 713static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 714{
 715    PCIDevice *dev = &proxy->pci_dev;
 716    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 717    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 718    unsigned int vector;
 719    int ret, queue_no;
 720
 721    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 722        if (!virtio_queue_get_num(vdev, queue_no)) {
 723            break;
 724        }
 725        vector = virtio_queue_vector(vdev, queue_no);
 726        if (vector >= msix_nr_vectors_allocated(dev)) {
 727            continue;
 728        }
 729        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
 730        if (ret < 0) {
 731            goto undo;
 732        }
 733        /* If guest supports masking, set up irqfd now.
 734         * Otherwise, delay until unmasked in the frontend.
 735         */
 736        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 737            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 738            if (ret < 0) {
 739                kvm_virtio_pci_vq_vector_release(proxy, vector);
 740                goto undo;
 741            }
 742        }
 743    }
 744    return 0;
 745
 746undo:
 747    while (--queue_no >= 0) {
 748        vector = virtio_queue_vector(vdev, queue_no);
 749        if (vector >= msix_nr_vectors_allocated(dev)) {
 750            continue;
 751        }
 752        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 753            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 754        }
 755        kvm_virtio_pci_vq_vector_release(proxy, vector);
 756    }
 757    return ret;
 758}
 759
 760static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
 761{
 762    PCIDevice *dev = &proxy->pci_dev;
 763    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 764    unsigned int vector;
 765    int queue_no;
 766    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 767
 768    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 769        if (!virtio_queue_get_num(vdev, queue_no)) {
 770            break;
 771        }
 772        vector = virtio_queue_vector(vdev, queue_no);
 773        if (vector >= msix_nr_vectors_allocated(dev)) {
 774            continue;
 775        }
 776        /* If guest supports masking, clean up irqfd now.
 777         * Otherwise, it was cleaned when masked in the frontend.
 778         */
 779        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 780            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 781        }
 782        kvm_virtio_pci_vq_vector_release(proxy, vector);
 783    }
 784}
 785
 786static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
 787                                       unsigned int queue_no,
 788                                       unsigned int vector,
 789                                       MSIMessage msg)
 790{
 791    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 792    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 793    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 794    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 795    VirtIOIRQFD *irqfd;
 796    int ret = 0;
 797
 798    if (proxy->vector_irqfd) {
 799        irqfd = &proxy->vector_irqfd[vector];
 800        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
 801            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
 802                                               &proxy->pci_dev);
 803            if (ret < 0) {
 804                return ret;
 805            }
 806            kvm_irqchip_commit_routes(kvm_state);
 807        }
 808    }
 809
 810    /* If guest supports masking, irqfd is already setup, unmask it.
 811     * Otherwise, set it up now.
 812     */
 813    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 814        k->guest_notifier_mask(vdev, queue_no, false);
 815        /* Test after unmasking to avoid losing events. */
 816        if (k->guest_notifier_pending &&
 817            k->guest_notifier_pending(vdev, queue_no)) {
 818            event_notifier_set(n);
 819        }
 820    } else {
 821        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 822    }
 823    return ret;
 824}
 825
 826static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
 827                                             unsigned int queue_no,
 828                                             unsigned int vector)
 829{
 830    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 831    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 832
 833    /* If guest supports masking, keep irqfd but mask it.
 834     * Otherwise, clean it up now.
 835     */ 
 836    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 837        k->guest_notifier_mask(vdev, queue_no, true);
 838    } else {
 839        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 840    }
 841}
 842
 843static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
 844                                    MSIMessage msg)
 845{
 846    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 847    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 848    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 849    int ret, index, unmasked = 0;
 850
 851    while (vq) {
 852        index = virtio_get_queue_index(vq);
 853        if (!virtio_queue_get_num(vdev, index)) {
 854            break;
 855        }
 856        if (index < proxy->nvqs_with_notifiers) {
 857            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
 858            if (ret < 0) {
 859                goto undo;
 860            }
 861            ++unmasked;
 862        }
 863        vq = virtio_vector_next_queue(vq);
 864    }
 865
 866    return 0;
 867
 868undo:
 869    vq = virtio_vector_first_queue(vdev, vector);
 870    while (vq && unmasked >= 0) {
 871        index = virtio_get_queue_index(vq);
 872        if (index < proxy->nvqs_with_notifiers) {
 873            virtio_pci_vq_vector_mask(proxy, index, vector);
 874            --unmasked;
 875        }
 876        vq = virtio_vector_next_queue(vq);
 877    }
 878    return ret;
 879}
 880
 881static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 882{
 883    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 884    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 885    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 886    int index;
 887
 888    while (vq) {
 889        index = virtio_get_queue_index(vq);
 890        if (!virtio_queue_get_num(vdev, index)) {
 891            break;
 892        }
 893        if (index < proxy->nvqs_with_notifiers) {
 894            virtio_pci_vq_vector_mask(proxy, index, vector);
 895        }
 896        vq = virtio_vector_next_queue(vq);
 897    }
 898}
 899
 900static void virtio_pci_vector_poll(PCIDevice *dev,
 901                                   unsigned int vector_start,
 902                                   unsigned int vector_end)
 903{
 904    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 905    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 906    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 907    int queue_no;
 908    unsigned int vector;
 909    EventNotifier *notifier;
 910    VirtQueue *vq;
 911
 912    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
 913        if (!virtio_queue_get_num(vdev, queue_no)) {
 914            break;
 915        }
 916        vector = virtio_queue_vector(vdev, queue_no);
 917        if (vector < vector_start || vector >= vector_end ||
 918            !msix_is_masked(dev, vector)) {
 919            continue;
 920        }
 921        vq = virtio_get_queue(vdev, queue_no);
 922        notifier = virtio_queue_get_guest_notifier(vq);
 923        if (k->guest_notifier_pending) {
 924            if (k->guest_notifier_pending(vdev, queue_no)) {
 925                msix_set_pending(dev, vector);
 926            }
 927        } else if (event_notifier_test_and_clear(notifier)) {
 928            msix_set_pending(dev, vector);
 929        }
 930    }
 931}
 932
 933static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
 934                                         bool with_irqfd)
 935{
 936    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 937    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 938    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 939    VirtQueue *vq = virtio_get_queue(vdev, n);
 940    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 941
 942    if (assign) {
 943        int r = event_notifier_init(notifier, 0);
 944        if (r < 0) {
 945            return r;
 946        }
 947        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
 948    } else {
 949        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
 950        event_notifier_cleanup(notifier);
 951    }
 952
 953    if (!msix_enabled(&proxy->pci_dev) &&
 954        vdev->use_guest_notifier_mask &&
 955        vdc->guest_notifier_mask) {
 956        vdc->guest_notifier_mask(vdev, n, !assign);
 957    }
 958
 959    return 0;
 960}
 961
 962static bool virtio_pci_query_guest_notifiers(DeviceState *d)
 963{
 964    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 965    return msix_enabled(&proxy->pci_dev);
 966}
 967
 968static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 969{
 970    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 971    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 972    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 973    int r, n;
 974    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
 975        kvm_msi_via_irqfd_enabled();
 976
 977    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
 978
 979    /* When deassigning, pass a consistent nvqs value
 980     * to avoid leaking notifiers.
 981     */
 982    assert(assign || nvqs == proxy->nvqs_with_notifiers);
 983
 984    proxy->nvqs_with_notifiers = nvqs;
 985
 986    /* Must unset vector notifier while guest notifier is still assigned */
 987    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
 988        msix_unset_vector_notifiers(&proxy->pci_dev);
 989        if (proxy->vector_irqfd) {
 990            kvm_virtio_pci_vector_release(proxy, nvqs);
 991            g_free(proxy->vector_irqfd);
 992            proxy->vector_irqfd = NULL;
 993        }
 994    }
 995
 996    for (n = 0; n < nvqs; n++) {
 997        if (!virtio_queue_get_num(vdev, n)) {
 998            break;
 999        }
1000
1001        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1002        if (r < 0) {
1003            goto assign_error;
1004        }
1005    }
1006
1007    /* Must set vector notifier after guest notifier has been assigned */
1008    if ((with_irqfd || k->guest_notifier_mask) && assign) {
1009        if (with_irqfd) {
1010            proxy->vector_irqfd =
1011                g_malloc0(sizeof(*proxy->vector_irqfd) *
1012                          msix_nr_vectors_allocated(&proxy->pci_dev));
1013            r = kvm_virtio_pci_vector_use(proxy, nvqs);
1014            if (r < 0) {
1015                goto assign_error;
1016            }
1017        }
1018        r = msix_set_vector_notifiers(&proxy->pci_dev,
1019                                      virtio_pci_vector_unmask,
1020                                      virtio_pci_vector_mask,
1021                                      virtio_pci_vector_poll);
1022        if (r < 0) {
1023            goto notifiers_error;
1024        }
1025    }
1026
1027    return 0;
1028
1029notifiers_error:
1030    if (with_irqfd) {
1031        assert(assign);
1032        kvm_virtio_pci_vector_release(proxy, nvqs);
1033    }
1034
1035assign_error:
1036    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1037    assert(assign);
1038    while (--n >= 0) {
1039        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1040    }
1041    return r;
1042}
1043
1044static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1045                                           MemoryRegion *mr, bool assign)
1046{
1047    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1048    int offset;
1049
1050    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1051        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1052        return -1;
1053    }
1054
1055    if (assign) {
1056        offset = virtio_pci_queue_mem_mult(proxy) * n;
1057        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1058    } else {
1059        memory_region_del_subregion(&proxy->notify.mr, mr);
1060    }
1061
1062    return 0;
1063}
1064
1065static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1066{
1067    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1068    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1069
1070    if (running) {
1071        /* Old QEMU versions did not set bus master enable on status write.
1072         * Detect DRIVER set and enable it.
1073         */
1074        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1075            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1076            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1077            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1078                                     proxy->pci_dev.config[PCI_COMMAND] |
1079                                     PCI_COMMAND_MASTER, 1);
1080        }
1081        virtio_pci_start_ioeventfd(proxy);
1082    } else {
1083        virtio_pci_stop_ioeventfd(proxy);
1084    }
1085}
1086
1087/*
1088 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1089 */
1090
1091static int virtio_pci_query_nvectors(DeviceState *d)
1092{
1093    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1094
1095    return proxy->nvectors;
1096}
1097
1098static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1099{
1100    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1101    PCIDevice *dev = &proxy->pci_dev;
1102
1103    return pci_get_address_space(dev);
1104}
1105
1106static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1107                                   struct virtio_pci_cap *cap)
1108{
1109    PCIDevice *dev = &proxy->pci_dev;
1110    int offset;
1111
1112    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1113                                cap->cap_len, &error_abort);
1114
1115    assert(cap->cap_len >= sizeof *cap);
1116    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1117           cap->cap_len - PCI_CAP_FLAGS);
1118
1119    return offset;
1120}
1121
1122static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1123                                       unsigned size)
1124{
1125    VirtIOPCIProxy *proxy = opaque;
1126    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1127    uint32_t val = 0;
1128    int i;
1129
1130    switch (addr) {
1131    case VIRTIO_PCI_COMMON_DFSELECT:
1132        val = proxy->dfselect;
1133        break;
1134    case VIRTIO_PCI_COMMON_DF:
1135        if (proxy->dfselect <= 1) {
1136            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1137
1138            val = (vdev->host_features & ~vdc->legacy_features) >>
1139                (32 * proxy->dfselect);
1140        }
1141        break;
1142    case VIRTIO_PCI_COMMON_GFSELECT:
1143        val = proxy->gfselect;
1144        break;
1145    case VIRTIO_PCI_COMMON_GF:
1146        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1147            val = proxy->guest_features[proxy->gfselect];
1148        }
1149        break;
1150    case VIRTIO_PCI_COMMON_MSIX:
1151        val = vdev->config_vector;
1152        break;
1153    case VIRTIO_PCI_COMMON_NUMQ:
1154        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1155            if (virtio_queue_get_num(vdev, i)) {
1156                val = i + 1;
1157            }
1158        }
1159        break;
1160    case VIRTIO_PCI_COMMON_STATUS:
1161        val = vdev->status;
1162        break;
1163    case VIRTIO_PCI_COMMON_CFGGENERATION:
1164        val = vdev->generation;
1165        break;
1166    case VIRTIO_PCI_COMMON_Q_SELECT:
1167        val = vdev->queue_sel;
1168        break;
1169    case VIRTIO_PCI_COMMON_Q_SIZE:
1170        val = virtio_queue_get_num(vdev, vdev->queue_sel);
1171        break;
1172    case VIRTIO_PCI_COMMON_Q_MSIX:
1173        val = virtio_queue_vector(vdev, vdev->queue_sel);
1174        break;
1175    case VIRTIO_PCI_COMMON_Q_ENABLE:
1176        val = proxy->vqs[vdev->queue_sel].enabled;
1177        break;
1178    case VIRTIO_PCI_COMMON_Q_NOFF:
1179        /* Simply map queues in order */
1180        val = vdev->queue_sel;
1181        break;
1182    case VIRTIO_PCI_COMMON_Q_DESCLO:
1183        val = proxy->vqs[vdev->queue_sel].desc[0];
1184        break;
1185    case VIRTIO_PCI_COMMON_Q_DESCHI:
1186        val = proxy->vqs[vdev->queue_sel].desc[1];
1187        break;
1188    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1189        val = proxy->vqs[vdev->queue_sel].avail[0];
1190        break;
1191    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1192        val = proxy->vqs[vdev->queue_sel].avail[1];
1193        break;
1194    case VIRTIO_PCI_COMMON_Q_USEDLO:
1195        val = proxy->vqs[vdev->queue_sel].used[0];
1196        break;
1197    case VIRTIO_PCI_COMMON_Q_USEDHI:
1198        val = proxy->vqs[vdev->queue_sel].used[1];
1199        break;
1200    default:
1201        val = 0;
1202    }
1203
1204    return val;
1205}
1206
1207static void virtio_pci_common_write(void *opaque, hwaddr addr,
1208                                    uint64_t val, unsigned size)
1209{
1210    VirtIOPCIProxy *proxy = opaque;
1211    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1212
1213    switch (addr) {
1214    case VIRTIO_PCI_COMMON_DFSELECT:
1215        proxy->dfselect = val;
1216        break;
1217    case VIRTIO_PCI_COMMON_GFSELECT:
1218        proxy->gfselect = val;
1219        break;
1220    case VIRTIO_PCI_COMMON_GF:
1221        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1222            proxy->guest_features[proxy->gfselect] = val;
1223            virtio_set_features(vdev,
1224                                (((uint64_t)proxy->guest_features[1]) << 32) |
1225                                proxy->guest_features[0]);
1226        }
1227        break;
1228    case VIRTIO_PCI_COMMON_MSIX:
1229        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1230        /* Make it possible for guest to discover an error took place. */
1231        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1232            val = VIRTIO_NO_VECTOR;
1233        }
1234        vdev->config_vector = val;
1235        break;
1236    case VIRTIO_PCI_COMMON_STATUS:
1237        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1238            virtio_pci_stop_ioeventfd(proxy);
1239        }
1240
1241        virtio_set_status(vdev, val & 0xFF);
1242
1243        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1244            virtio_pci_start_ioeventfd(proxy);
1245        }
1246
1247        if (vdev->status == 0) {
1248            virtio_pci_reset(DEVICE(proxy));
1249        }
1250
1251        break;
1252    case VIRTIO_PCI_COMMON_Q_SELECT:
1253        if (val < VIRTIO_QUEUE_MAX) {
1254            vdev->queue_sel = val;
1255        }
1256        break;
1257    case VIRTIO_PCI_COMMON_Q_SIZE:
1258        proxy->vqs[vdev->queue_sel].num = val;
1259        virtio_queue_set_num(vdev, vdev->queue_sel,
1260                             proxy->vqs[vdev->queue_sel].num);
1261        break;
1262    case VIRTIO_PCI_COMMON_Q_MSIX:
1263        msix_vector_unuse(&proxy->pci_dev,
1264                          virtio_queue_vector(vdev, vdev->queue_sel));
1265        /* Make it possible for guest to discover an error took place. */
1266        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1267            val = VIRTIO_NO_VECTOR;
1268        }
1269        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
1270        break;
1271    case VIRTIO_PCI_COMMON_Q_ENABLE:
1272        virtio_queue_set_num(vdev, vdev->queue_sel,
1273                             proxy->vqs[vdev->queue_sel].num);
1274        virtio_queue_set_rings(vdev, vdev->queue_sel,
1275                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1276                       proxy->vqs[vdev->queue_sel].desc[0],
1277                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1278                       proxy->vqs[vdev->queue_sel].avail[0],
1279                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1280                       proxy->vqs[vdev->queue_sel].used[0]);
1281        proxy->vqs[vdev->queue_sel].enabled = 1;
1282        break;
1283    case VIRTIO_PCI_COMMON_Q_DESCLO:
1284        proxy->vqs[vdev->queue_sel].desc[0] = val;
1285        break;
1286    case VIRTIO_PCI_COMMON_Q_DESCHI:
1287        proxy->vqs[vdev->queue_sel].desc[1] = val;
1288        break;
1289    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1290        proxy->vqs[vdev->queue_sel].avail[0] = val;
1291        break;
1292    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1293        proxy->vqs[vdev->queue_sel].avail[1] = val;
1294        break;
1295    case VIRTIO_PCI_COMMON_Q_USEDLO:
1296        proxy->vqs[vdev->queue_sel].used[0] = val;
1297        break;
1298    case VIRTIO_PCI_COMMON_Q_USEDHI:
1299        proxy->vqs[vdev->queue_sel].used[1] = val;
1300        break;
1301    default:
1302        break;
1303    }
1304}
1305
1306
1307static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1308                                       unsigned size)
1309{
1310    return 0;
1311}
1312
1313static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1314                                    uint64_t val, unsigned size)
1315{
1316    VirtIODevice *vdev = opaque;
1317    VirtIOPCIProxy *proxy = VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent);
1318    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1319
1320    if (queue < VIRTIO_QUEUE_MAX) {
1321        virtio_queue_notify(vdev, queue);
1322    }
1323}
1324
1325static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1326                                        uint64_t val, unsigned size)
1327{
1328    VirtIODevice *vdev = opaque;
1329    unsigned queue = val;
1330
1331    if (queue < VIRTIO_QUEUE_MAX) {
1332        virtio_queue_notify(vdev, queue);
1333    }
1334}
1335
1336static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1337                                    unsigned size)
1338{
1339    VirtIOPCIProxy *proxy = opaque;
1340    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1341    uint64_t val = atomic_xchg(&vdev->isr, 0);
1342    pci_irq_deassert(&proxy->pci_dev);
1343
1344    return val;
1345}
1346
1347static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1348                                 uint64_t val, unsigned size)
1349{
1350}
1351
1352static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1353                                       unsigned size)
1354{
1355    VirtIODevice *vdev = opaque;
1356    uint64_t val = 0;
1357
1358    switch (size) {
1359    case 1:
1360        val = virtio_config_modern_readb(vdev, addr);
1361        break;
1362    case 2:
1363        val = virtio_config_modern_readw(vdev, addr);
1364        break;
1365    case 4:
1366        val = virtio_config_modern_readl(vdev, addr);
1367        break;
1368    }
1369    return val;
1370}
1371
1372static void virtio_pci_device_write(void *opaque, hwaddr addr,
1373                                    uint64_t val, unsigned size)
1374{
1375    VirtIODevice *vdev = opaque;
1376    switch (size) {
1377    case 1:
1378        virtio_config_modern_writeb(vdev, addr, val);
1379        break;
1380    case 2:
1381        virtio_config_modern_writew(vdev, addr, val);
1382        break;
1383    case 4:
1384        virtio_config_modern_writel(vdev, addr, val);
1385        break;
1386    }
1387}
1388
1389static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy)
1390{
1391    static const MemoryRegionOps common_ops = {
1392        .read = virtio_pci_common_read,
1393        .write = virtio_pci_common_write,
1394        .impl = {
1395            .min_access_size = 1,
1396            .max_access_size = 4,
1397        },
1398        .endianness = DEVICE_LITTLE_ENDIAN,
1399    };
1400    static const MemoryRegionOps isr_ops = {
1401        .read = virtio_pci_isr_read,
1402        .write = virtio_pci_isr_write,
1403        .impl = {
1404            .min_access_size = 1,
1405            .max_access_size = 4,
1406        },
1407        .endianness = DEVICE_LITTLE_ENDIAN,
1408    };
1409    static const MemoryRegionOps device_ops = {
1410        .read = virtio_pci_device_read,
1411        .write = virtio_pci_device_write,
1412        .impl = {
1413            .min_access_size = 1,
1414            .max_access_size = 4,
1415        },
1416        .endianness = DEVICE_LITTLE_ENDIAN,
1417    };
1418    static const MemoryRegionOps notify_ops = {
1419        .read = virtio_pci_notify_read,
1420        .write = virtio_pci_notify_write,
1421        .impl = {
1422            .min_access_size = 1,
1423            .max_access_size = 4,
1424        },
1425        .endianness = DEVICE_LITTLE_ENDIAN,
1426    };
1427    static const MemoryRegionOps notify_pio_ops = {
1428        .read = virtio_pci_notify_read,
1429        .write = virtio_pci_notify_write_pio,
1430        .impl = {
1431            .min_access_size = 1,
1432            .max_access_size = 4,
1433        },
1434        .endianness = DEVICE_LITTLE_ENDIAN,
1435    };
1436
1437
1438    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1439                          &common_ops,
1440                          proxy,
1441                          "virtio-pci-common",
1442                          proxy->common.size);
1443
1444    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1445                          &isr_ops,
1446                          proxy,
1447                          "virtio-pci-isr",
1448                          proxy->isr.size);
1449
1450    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1451                          &device_ops,
1452                          virtio_bus_get_device(&proxy->bus),
1453                          "virtio-pci-device",
1454                          proxy->device.size);
1455
1456    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1457                          &notify_ops,
1458                          virtio_bus_get_device(&proxy->bus),
1459                          "virtio-pci-notify",
1460                          proxy->notify.size);
1461
1462    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1463                          &notify_pio_ops,
1464                          virtio_bus_get_device(&proxy->bus),
1465                          "virtio-pci-notify-pio",
1466                          proxy->notify_pio.size);
1467}
1468
1469static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1470                                         VirtIOPCIRegion *region,
1471                                         struct virtio_pci_cap *cap,
1472                                         MemoryRegion *mr,
1473                                         uint8_t bar)
1474{
1475    memory_region_add_subregion(mr, region->offset, &region->mr);
1476
1477    cap->cfg_type = region->type;
1478    cap->bar = bar;
1479    cap->offset = cpu_to_le32(region->offset);
1480    cap->length = cpu_to_le32(region->size);
1481    virtio_pci_add_mem_cap(proxy, cap);
1482
1483}
1484
1485static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1486                                             VirtIOPCIRegion *region,
1487                                             struct virtio_pci_cap *cap)
1488{
1489    virtio_pci_modern_region_map(proxy, region, cap,
1490                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1491}
1492
1493static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1494                                            VirtIOPCIRegion *region,
1495                                            struct virtio_pci_cap *cap)
1496{
1497    virtio_pci_modern_region_map(proxy, region, cap,
1498                                 &proxy->io_bar, proxy->modern_io_bar_idx);
1499}
1500
1501static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1502                                               VirtIOPCIRegion *region)
1503{
1504    memory_region_del_subregion(&proxy->modern_bar,
1505                                &region->mr);
1506}
1507
1508static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1509                                              VirtIOPCIRegion *region)
1510{
1511    memory_region_del_subregion(&proxy->io_bar,
1512                                &region->mr);
1513}
1514
1515static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1516{
1517    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1518    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1519
1520    if (virtio_pci_modern(proxy)) {
1521        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1522    }
1523
1524    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1525}
1526
1527/* This is called by virtio-bus just after the device is plugged. */
1528static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1529{
1530    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1531    VirtioBusState *bus = &proxy->bus;
1532    bool legacy = virtio_pci_legacy(proxy);
1533    bool modern;
1534    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1535    uint8_t *config;
1536    uint32_t size;
1537    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1538
1539    /*
1540     * Virtio capabilities present without
1541     * VIRTIO_F_VERSION_1 confuses guests
1542     */
1543    if (!proxy->ignore_backend_features &&
1544            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1545        virtio_pci_disable_modern(proxy);
1546
1547        if (!legacy) {
1548            error_setg(errp, "Device doesn't support modern mode, and legacy"
1549                             " mode is disabled");
1550            error_append_hint(errp, "Set disable-legacy to off\n");
1551
1552            return;
1553        }
1554    }
1555
1556    modern = virtio_pci_modern(proxy);
1557
1558    config = proxy->pci_dev.config;
1559    if (proxy->class_code) {
1560        pci_config_set_class(config, proxy->class_code);
1561    }
1562
1563    if (legacy) {
1564        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1565            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
1566                       " neither legacy nor transitional device");
1567            return ;
1568        }
1569        /*
1570         * Legacy and transitional devices use specific subsystem IDs.
1571         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
1572         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
1573         */
1574        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
1575    } else {
1576        /* pure virtio-1.0 */
1577        pci_set_word(config + PCI_VENDOR_ID,
1578                     PCI_VENDOR_ID_REDHAT_QUMRANET);
1579        pci_set_word(config + PCI_DEVICE_ID,
1580                     0x1040 + virtio_bus_get_vdev_id(bus));
1581        pci_config_set_revision(config, 1);
1582    }
1583    config[PCI_INTERRUPT_PIN] = 1;
1584
1585
1586    if (modern) {
1587        struct virtio_pci_cap cap = {
1588            .cap_len = sizeof cap,
1589        };
1590        struct virtio_pci_notify_cap notify = {
1591            .cap.cap_len = sizeof notify,
1592            .notify_off_multiplier =
1593                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
1594        };
1595        struct virtio_pci_cfg_cap cfg = {
1596            .cap.cap_len = sizeof cfg,
1597            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
1598        };
1599        struct virtio_pci_notify_cap notify_pio = {
1600            .cap.cap_len = sizeof notify,
1601            .notify_off_multiplier = cpu_to_le32(0x0),
1602        };
1603
1604        struct virtio_pci_cfg_cap *cfg_mask;
1605
1606        virtio_pci_modern_regions_init(proxy);
1607
1608        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
1609        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
1610        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
1611        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
1612
1613        if (modern_pio) {
1614            memory_region_init(&proxy->io_bar, OBJECT(proxy),
1615                               "virtio-pci-io", 0x4);
1616
1617            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
1618                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
1619
1620            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
1621                                            &notify_pio.cap);
1622        }
1623
1624        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
1625                         PCI_BASE_ADDRESS_SPACE_MEMORY |
1626                         PCI_BASE_ADDRESS_MEM_PREFETCH |
1627                         PCI_BASE_ADDRESS_MEM_TYPE_64,
1628                         &proxy->modern_bar);
1629
1630        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
1631        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
1632        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
1633        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
1634        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
1635        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
1636    }
1637
1638    if (proxy->nvectors) {
1639        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
1640                                          proxy->msix_bar_idx, NULL);
1641        if (err) {
1642            /* Notice when a system that supports MSIx can't initialize it */
1643            if (err != -ENOTSUP) {
1644                warn_report("unable to init msix vectors to %" PRIu32,
1645                            proxy->nvectors);
1646            }
1647            proxy->nvectors = 0;
1648        }
1649    }
1650
1651    proxy->pci_dev.config_write = virtio_write_config;
1652    proxy->pci_dev.config_read = virtio_read_config;
1653
1654    if (legacy) {
1655        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
1656            + virtio_bus_get_vdev_config_len(bus);
1657        size = pow2ceil(size);
1658
1659        memory_region_init_io(&proxy->bar, OBJECT(proxy),
1660                              &virtio_pci_config_ops,
1661                              proxy, "virtio-pci", size);
1662
1663        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
1664                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
1665    }
1666}
1667
1668static void virtio_pci_device_unplugged(DeviceState *d)
1669{
1670    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1671    bool modern = virtio_pci_modern(proxy);
1672    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1673
1674    virtio_pci_stop_ioeventfd(proxy);
1675
1676    if (modern) {
1677        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
1678        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
1679        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
1680        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
1681        if (modern_pio) {
1682            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
1683        }
1684    }
1685}
1686
1687static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
1688{
1689    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1690    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
1691    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1692                     !pci_bus_is_root(pci_get_bus(pci_dev));
1693
1694    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
1695        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
1696    }
1697
1698    /*
1699     * virtio pci bar layout used by default.
1700     * subclasses can re-arrange things if needed.
1701     *
1702     *   region 0   --  virtio legacy io bar
1703     *   region 1   --  msi-x bar
1704     *   region 4+5 --  virtio modern memory (64bit) bar
1705     *
1706     */
1707    proxy->legacy_io_bar_idx  = 0;
1708    proxy->msix_bar_idx       = 1;
1709    proxy->modern_io_bar_idx  = 2;
1710    proxy->modern_mem_bar_idx = 4;
1711
1712    proxy->common.offset = 0x0;
1713    proxy->common.size = 0x1000;
1714    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
1715
1716    proxy->isr.offset = 0x1000;
1717    proxy->isr.size = 0x1000;
1718    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
1719
1720    proxy->device.offset = 0x2000;
1721    proxy->device.size = 0x1000;
1722    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
1723
1724    proxy->notify.offset = 0x3000;
1725    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
1726    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1727
1728    proxy->notify_pio.offset = 0x0;
1729    proxy->notify_pio.size = 0x4;
1730    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1731
1732    /* subclasses can enforce modern, so do this unconditionally */
1733    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
1734                       /* PCI BAR regions must be powers of 2 */
1735                       pow2ceil(proxy->notify.offset + proxy->notify.size));
1736
1737    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
1738        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
1739    }
1740
1741    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
1742        error_setg(errp, "device cannot work as neither modern nor legacy mode"
1743                   " is enabled");
1744        error_append_hint(errp, "Set either disable-modern or disable-legacy"
1745                          " to off\n");
1746        return;
1747    }
1748
1749    if (pcie_port && pci_is_express(pci_dev)) {
1750        int pos;
1751
1752        pos = pcie_endpoint_cap_init(pci_dev, 0);
1753        assert(pos > 0);
1754
1755        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
1756                                 PCI_PM_SIZEOF, errp);
1757        if (pos < 0) {
1758            return;
1759        }
1760
1761        pci_dev->exp.pm_cap = pos;
1762
1763        /*
1764         * Indicates that this function complies with revision 1.2 of the
1765         * PCI Power Management Interface Specification.
1766         */
1767        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
1768
1769        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
1770            /* Init error enabling flags */
1771            pcie_cap_deverr_init(pci_dev);
1772        }
1773
1774        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
1775            /* Init Link Control Register */
1776            pcie_cap_lnkctl_init(pci_dev);
1777        }
1778
1779        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
1780            /* Init Power Management Control Register */
1781            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
1782                         PCI_PM_CTRL_STATE_MASK);
1783        }
1784
1785        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
1786            pcie_ats_init(pci_dev, 256);
1787        }
1788
1789        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
1790            /* Set Function Level Reset capability bit */
1791            pcie_cap_flr_init(pci_dev);
1792        }
1793    } else {
1794        /*
1795         * make future invocations of pci_is_express() return false
1796         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
1797         */
1798        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
1799    }
1800
1801    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
1802    if (k->realize) {
1803        k->realize(proxy, errp);
1804    }
1805}
1806
1807static void virtio_pci_exit(PCIDevice *pci_dev)
1808{
1809    msix_uninit_exclusive_bar(pci_dev);
1810}
1811
1812static void virtio_pci_reset(DeviceState *qdev)
1813{
1814    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1815    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
1816    PCIDevice *dev = PCI_DEVICE(qdev);
1817    int i;
1818
1819    virtio_pci_stop_ioeventfd(proxy);
1820    virtio_bus_reset(bus);
1821    msix_unuse_all_vectors(&proxy->pci_dev);
1822
1823    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1824        proxy->vqs[i].enabled = 0;
1825        proxy->vqs[i].num = 0;
1826        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
1827        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
1828        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
1829    }
1830
1831    if (pci_is_express(dev)) {
1832        pcie_cap_deverr_reset(dev);
1833        pcie_cap_lnkctl_reset(dev);
1834
1835        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
1836    }
1837}
1838
1839static Property virtio_pci_properties[] = {
1840    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
1841                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
1842    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
1843                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
1844    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
1845                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
1846    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
1847                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
1848    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
1849                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
1850    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
1851                     ignore_backend_features, false),
1852    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
1853                    VIRTIO_PCI_FLAG_ATS_BIT, false),
1854    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
1855                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
1856    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
1857                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
1858    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
1859                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
1860    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
1861                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
1862    DEFINE_PROP_END_OF_LIST(),
1863};
1864
1865static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
1866{
1867    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
1868    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1869    PCIDevice *pci_dev = &proxy->pci_dev;
1870
1871    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
1872        virtio_pci_modern(proxy)) {
1873        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
1874    }
1875
1876    vpciklass->parent_dc_realize(qdev, errp);
1877}
1878
1879static void virtio_pci_class_init(ObjectClass *klass, void *data)
1880{
1881    DeviceClass *dc = DEVICE_CLASS(klass);
1882    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1883    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
1884
1885    dc->props = virtio_pci_properties;
1886    k->realize = virtio_pci_realize;
1887    k->exit = virtio_pci_exit;
1888    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
1889    k->revision = VIRTIO_PCI_ABI_VERSION;
1890    k->class_id = PCI_CLASS_OTHERS;
1891    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
1892                                    &vpciklass->parent_dc_realize);
1893    dc->reset = virtio_pci_reset;
1894}
1895
1896static const TypeInfo virtio_pci_info = {
1897    .name          = TYPE_VIRTIO_PCI,
1898    .parent        = TYPE_PCI_DEVICE,
1899    .instance_size = sizeof(VirtIOPCIProxy),
1900    .class_init    = virtio_pci_class_init,
1901    .class_size    = sizeof(VirtioPCIClass),
1902    .abstract      = true,
1903};
1904
1905static Property virtio_pci_generic_properties[] = {
1906    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
1907                            ON_OFF_AUTO_AUTO),
1908    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
1909    DEFINE_PROP_END_OF_LIST(),
1910};
1911
1912static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
1913{
1914    const VirtioPCIDeviceTypeInfo *t = data;
1915    if (t->class_init) {
1916        t->class_init(klass, NULL);
1917    }
1918}
1919
1920static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
1921{
1922    DeviceClass *dc = DEVICE_CLASS(klass);
1923
1924    dc->props = virtio_pci_generic_properties;
1925}
1926
1927static void virtio_pci_transitional_instance_init(Object *obj)
1928{
1929    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
1930
1931    proxy->disable_legacy = ON_OFF_AUTO_OFF;
1932    proxy->disable_modern = false;
1933}
1934
1935static void virtio_pci_non_transitional_instance_init(Object *obj)
1936{
1937    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
1938
1939    proxy->disable_legacy = ON_OFF_AUTO_ON;
1940    proxy->disable_modern = false;
1941}
1942
1943void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
1944{
1945    char *base_name = NULL;
1946    TypeInfo base_type_info = {
1947        .name          = t->base_name,
1948        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
1949        .instance_size = t->instance_size,
1950        .instance_init = t->instance_init,
1951        .class_size    = t->class_size,
1952        .abstract      = true,
1953        .interfaces    = t->interfaces,
1954    };
1955    TypeInfo generic_type_info = {
1956        .name = t->generic_name,
1957        .parent = base_type_info.name,
1958        .class_init = virtio_pci_generic_class_init,
1959        .interfaces = (InterfaceInfo[]) {
1960            { INTERFACE_PCIE_DEVICE },
1961            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1962            { }
1963        },
1964    };
1965
1966    if (!base_type_info.name) {
1967        /* No base type -> register a single generic device type */
1968        /* use intermediate %s-base-type to add generic device props */
1969        base_name = g_strdup_printf("%s-base-type", t->generic_name);
1970        base_type_info.name = base_name;
1971        base_type_info.class_init = virtio_pci_generic_class_init;
1972
1973        generic_type_info.parent = base_name;
1974        generic_type_info.class_init = virtio_pci_base_class_init;
1975        generic_type_info.class_data = (void *)t;
1976
1977        assert(!t->non_transitional_name);
1978        assert(!t->transitional_name);
1979    } else {
1980        base_type_info.class_init = virtio_pci_base_class_init;
1981        base_type_info.class_data = (void *)t;
1982    }
1983
1984    type_register(&base_type_info);
1985    if (generic_type_info.name) {
1986        type_register(&generic_type_info);
1987    }
1988
1989    if (t->non_transitional_name) {
1990        const TypeInfo non_transitional_type_info = {
1991            .name          = t->non_transitional_name,
1992            .parent        = base_type_info.name,
1993            .instance_init = virtio_pci_non_transitional_instance_init,
1994            .interfaces = (InterfaceInfo[]) {
1995                { INTERFACE_PCIE_DEVICE },
1996                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1997                { }
1998            },
1999        };
2000        type_register(&non_transitional_type_info);
2001    }
2002
2003    if (t->transitional_name) {
2004        const TypeInfo transitional_type_info = {
2005            .name          = t->transitional_name,
2006            .parent        = base_type_info.name,
2007            .instance_init = virtio_pci_transitional_instance_init,
2008            .interfaces = (InterfaceInfo[]) {
2009                /*
2010                 * Transitional virtio devices work only as Conventional PCI
2011                 * devices because they require PIO ports.
2012                 */
2013                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2014                { }
2015            },
2016        };
2017        type_register(&transitional_type_info);
2018    }
2019    g_free(base_name);
2020}
2021
2022/* virtio-pci-bus */
2023
2024static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2025                               VirtIOPCIProxy *dev)
2026{
2027    DeviceState *qdev = DEVICE(dev);
2028    char virtio_bus_name[] = "virtio-bus";
2029
2030    qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev,
2031                        virtio_bus_name);
2032}
2033
2034static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
2035{
2036    BusClass *bus_class = BUS_CLASS(klass);
2037    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2038    bus_class->max_dev = 1;
2039    k->notify = virtio_pci_notify;
2040    k->save_config = virtio_pci_save_config;
2041    k->load_config = virtio_pci_load_config;
2042    k->save_queue = virtio_pci_save_queue;
2043    k->load_queue = virtio_pci_load_queue;
2044    k->save_extra_state = virtio_pci_save_extra_state;
2045    k->load_extra_state = virtio_pci_load_extra_state;
2046    k->has_extra_state = virtio_pci_has_extra_state;
2047    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2048    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2049    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2050    k->vmstate_change = virtio_pci_vmstate_change;
2051    k->pre_plugged = virtio_pci_pre_plugged;
2052    k->device_plugged = virtio_pci_device_plugged;
2053    k->device_unplugged = virtio_pci_device_unplugged;
2054    k->query_nvectors = virtio_pci_query_nvectors;
2055    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2056    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2057    k->get_dma_as = virtio_pci_get_dma_as;
2058}
2059
2060static const TypeInfo virtio_pci_bus_info = {
2061    .name          = TYPE_VIRTIO_PCI_BUS,
2062    .parent        = TYPE_VIRTIO_BUS,
2063    .instance_size = sizeof(VirtioPCIBusState),
2064    .class_init    = virtio_pci_bus_class_init,
2065};
2066
2067static void virtio_pci_register_types(void)
2068{
2069    /* Base types: */
2070    type_register_static(&virtio_pci_bus_info);
2071    type_register_static(&virtio_pci_info);
2072}
2073
2074type_init(virtio_pci_register_types)
2075
2076