qemu/hw/virtio/virtio-pci.c
<<
>>
Prefs
   1/*
   2 * Virtio PCI Bindings
   3 *
   4 * Copyright IBM, Corp. 2007
   5 * Copyright (c) 2009 CodeSourcery
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Paul Brook        <paul@codesourcery.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 *
  14 * Contributions after 2012-01-13 are licensed under the terms of the
  15 * GNU GPL, version 2 or (at your option) any later version.
  16 */
  17
  18#include "qemu/osdep.h"
  19
  20#include "exec/memop.h"
  21#include "standard-headers/linux/virtio_pci.h"
  22#include "hw/boards.h"
  23#include "hw/virtio/virtio.h"
  24#include "migration/qemu-file-types.h"
  25#include "hw/pci/pci.h"
  26#include "hw/pci/pci_bus.h"
  27#include "hw/qdev-properties.h"
  28#include "qapi/error.h"
  29#include "qemu/error-report.h"
  30#include "qemu/log.h"
  31#include "qemu/module.h"
  32#include "hw/pci/msi.h"
  33#include "hw/pci/msix.h"
  34#include "hw/loader.h"
  35#include "sysemu/kvm.h"
  36#include "virtio-pci.h"
  37#include "qemu/range.h"
  38#include "hw/virtio/virtio-bus.h"
  39#include "qapi/visitor.h"
  40#include "sysemu/replay.h"
  41
  42#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
  43
  44#undef VIRTIO_PCI_CONFIG
  45
  46/* The remaining space is defined by each driver as the per-driver
  47 * configuration space */
  48#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
  49
  50static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
  51                               VirtIOPCIProxy *dev);
  52static void virtio_pci_reset(DeviceState *qdev);
  53
  54/* virtio device */
  55/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
  56static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
  57{
  58    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  59}
  60
  61/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
  62 * be careful and test performance if you change this.
  63 */
  64static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
  65{
  66    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  67}
  68
  69static void virtio_pci_notify(DeviceState *d, uint16_t vector)
  70{
  71    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
  72
  73    if (msix_enabled(&proxy->pci_dev))
  74        msix_notify(&proxy->pci_dev, vector);
  75    else {
  76        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  77        pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
  78    }
  79}
  80
  81static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
  82{
  83    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
  84    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  85
  86    pci_device_save(&proxy->pci_dev, f);
  87    msix_save(&proxy->pci_dev, f);
  88    if (msix_present(&proxy->pci_dev))
  89        qemu_put_be16(f, vdev->config_vector);
  90}
  91
  92static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
  93    .name = "virtio_pci/modern_queue_state",
  94    .version_id = 1,
  95    .minimum_version_id = 1,
  96    .fields = (VMStateField[]) {
  97        VMSTATE_UINT16(num, VirtIOPCIQueue),
  98        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
  99        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
 100        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
 101        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
 102        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
 103        VMSTATE_END_OF_LIST()
 104    }
 105};
 106
 107static bool virtio_pci_modern_state_needed(void *opaque)
 108{
 109    VirtIOPCIProxy *proxy = opaque;
 110
 111    return virtio_pci_modern(proxy);
 112}
 113
 114static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
 115    .name = "virtio_pci/modern_state",
 116    .version_id = 1,
 117    .minimum_version_id = 1,
 118    .needed = &virtio_pci_modern_state_needed,
 119    .fields = (VMStateField[]) {
 120        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
 121        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
 122        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
 123        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
 124                             vmstate_virtio_pci_modern_queue_state,
 125                             VirtIOPCIQueue),
 126        VMSTATE_END_OF_LIST()
 127    }
 128};
 129
 130static const VMStateDescription vmstate_virtio_pci = {
 131    .name = "virtio_pci",
 132    .version_id = 1,
 133    .minimum_version_id = 1,
 134    .fields = (VMStateField[]) {
 135        VMSTATE_END_OF_LIST()
 136    },
 137    .subsections = (const VMStateDescription*[]) {
 138        &vmstate_virtio_pci_modern_state_sub,
 139        NULL
 140    }
 141};
 142
 143static bool virtio_pci_has_extra_state(DeviceState *d)
 144{
 145    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 146
 147    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
 148}
 149
 150static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
 151{
 152    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 153
 154    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
 155}
 156
 157static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
 158{
 159    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 160
 161    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
 162}
 163
 164static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
 165{
 166    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 167    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 168
 169    if (msix_present(&proxy->pci_dev))
 170        qemu_put_be16(f, virtio_queue_vector(vdev, n));
 171}
 172
 173static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
 174{
 175    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 176    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 177
 178    int ret;
 179    ret = pci_device_load(&proxy->pci_dev, f);
 180    if (ret) {
 181        return ret;
 182    }
 183    msix_unuse_all_vectors(&proxy->pci_dev);
 184    msix_load(&proxy->pci_dev, f);
 185    if (msix_present(&proxy->pci_dev)) {
 186        qemu_get_be16s(f, &vdev->config_vector);
 187    } else {
 188        vdev->config_vector = VIRTIO_NO_VECTOR;
 189    }
 190    if (vdev->config_vector != VIRTIO_NO_VECTOR) {
 191        return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
 192    }
 193    return 0;
 194}
 195
 196static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
 197{
 198    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 199    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 200
 201    uint16_t vector;
 202    if (msix_present(&proxy->pci_dev)) {
 203        qemu_get_be16s(f, &vector);
 204    } else {
 205        vector = VIRTIO_NO_VECTOR;
 206    }
 207    virtio_queue_set_vector(vdev, n, vector);
 208    if (vector != VIRTIO_NO_VECTOR) {
 209        return msix_vector_use(&proxy->pci_dev, vector);
 210    }
 211
 212    return 0;
 213}
 214
 215static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
 216{
 217    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 218
 219    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
 220}
 221
 222#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
 223
 224static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
 225{
 226    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
 227        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
 228}
 229
 230static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
 231                                       int n, bool assign)
 232{
 233    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 234    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 235    VirtQueue *vq = virtio_get_queue(vdev, n);
 236    bool legacy = virtio_pci_legacy(proxy);
 237    bool modern = virtio_pci_modern(proxy);
 238    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
 239    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
 240    MemoryRegion *modern_mr = &proxy->notify.mr;
 241    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
 242    MemoryRegion *legacy_mr = &proxy->bar;
 243    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
 244                         virtio_get_queue_index(vq);
 245    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
 246
 247    if (assign) {
 248        if (modern) {
 249            if (fast_mmio) {
 250                memory_region_add_eventfd(modern_mr, modern_addr, 0,
 251                                          false, n, notifier);
 252            } else {
 253                memory_region_add_eventfd(modern_mr, modern_addr, 2,
 254                                          false, n, notifier);
 255            }
 256            if (modern_pio) {
 257                memory_region_add_eventfd(modern_notify_mr, 0, 2,
 258                                              true, n, notifier);
 259            }
 260        }
 261        if (legacy) {
 262            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
 263                                      true, n, notifier);
 264        }
 265    } else {
 266        if (modern) {
 267            if (fast_mmio) {
 268                memory_region_del_eventfd(modern_mr, modern_addr, 0,
 269                                          false, n, notifier);
 270            } else {
 271                memory_region_del_eventfd(modern_mr, modern_addr, 2,
 272                                          false, n, notifier);
 273            }
 274            if (modern_pio) {
 275                memory_region_del_eventfd(modern_notify_mr, 0, 2,
 276                                          true, n, notifier);
 277            }
 278        }
 279        if (legacy) {
 280            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
 281                                      true, n, notifier);
 282        }
 283    }
 284    return 0;
 285}
 286
 287static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 288{
 289    virtio_bus_start_ioeventfd(&proxy->bus);
 290}
 291
 292static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 293{
 294    virtio_bus_stop_ioeventfd(&proxy->bus);
 295}
 296
 297static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 298{
 299    VirtIOPCIProxy *proxy = opaque;
 300    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 301    hwaddr pa;
 302
 303    switch (addr) {
 304    case VIRTIO_PCI_GUEST_FEATURES:
 305        /* Guest does not negotiate properly?  We have to assume nothing. */
 306        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
 307            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
 308        }
 309        virtio_set_features(vdev, val);
 310        break;
 311    case VIRTIO_PCI_QUEUE_PFN:
 312        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 313        if (pa == 0) {
 314            virtio_pci_reset(DEVICE(proxy));
 315        }
 316        else
 317            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
 318        break;
 319    case VIRTIO_PCI_QUEUE_SEL:
 320        if (val < VIRTIO_QUEUE_MAX)
 321            vdev->queue_sel = val;
 322        break;
 323    case VIRTIO_PCI_QUEUE_NOTIFY:
 324        if (val < VIRTIO_QUEUE_MAX) {
 325            virtio_queue_notify(vdev, val);
 326        }
 327        break;
 328    case VIRTIO_PCI_STATUS:
 329        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
 330            virtio_pci_stop_ioeventfd(proxy);
 331        }
 332
 333        virtio_set_status(vdev, val & 0xFF);
 334
 335        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
 336            virtio_pci_start_ioeventfd(proxy);
 337        }
 338
 339        if (vdev->status == 0) {
 340            virtio_pci_reset(DEVICE(proxy));
 341        }
 342
 343        /* Linux before 2.6.34 drives the device without enabling
 344           the PCI device bus master bit. Enable it automatically
 345           for the guest. This is a PCI spec violation but so is
 346           initiating DMA with bus master bit clear. */
 347        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
 348            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
 349                                     proxy->pci_dev.config[PCI_COMMAND] |
 350                                     PCI_COMMAND_MASTER, 1);
 351        }
 352        break;
 353    case VIRTIO_MSI_CONFIG_VECTOR:
 354        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
 355        /* Make it possible for guest to discover an error took place. */
 356        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 357            val = VIRTIO_NO_VECTOR;
 358        vdev->config_vector = val;
 359        break;
 360    case VIRTIO_MSI_QUEUE_VECTOR:
 361        msix_vector_unuse(&proxy->pci_dev,
 362                          virtio_queue_vector(vdev, vdev->queue_sel));
 363        /* Make it possible for guest to discover an error took place. */
 364        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 365            val = VIRTIO_NO_VECTOR;
 366        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
 367        break;
 368    default:
 369        qemu_log_mask(LOG_GUEST_ERROR,
 370                      "%s: unexpected address 0x%x value 0x%x\n",
 371                      __func__, addr, val);
 372        break;
 373    }
 374}
 375
 376static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 377{
 378    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 379    uint32_t ret = 0xFFFFFFFF;
 380
 381    switch (addr) {
 382    case VIRTIO_PCI_HOST_FEATURES:
 383        ret = vdev->host_features;
 384        break;
 385    case VIRTIO_PCI_GUEST_FEATURES:
 386        ret = vdev->guest_features;
 387        break;
 388    case VIRTIO_PCI_QUEUE_PFN:
 389        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
 390              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 391        break;
 392    case VIRTIO_PCI_QUEUE_NUM:
 393        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
 394        break;
 395    case VIRTIO_PCI_QUEUE_SEL:
 396        ret = vdev->queue_sel;
 397        break;
 398    case VIRTIO_PCI_STATUS:
 399        ret = vdev->status;
 400        break;
 401    case VIRTIO_PCI_ISR:
 402        /* reading from the ISR also clears it. */
 403        ret = qatomic_xchg(&vdev->isr, 0);
 404        pci_irq_deassert(&proxy->pci_dev);
 405        break;
 406    case VIRTIO_MSI_CONFIG_VECTOR:
 407        ret = vdev->config_vector;
 408        break;
 409    case VIRTIO_MSI_QUEUE_VECTOR:
 410        ret = virtio_queue_vector(vdev, vdev->queue_sel);
 411        break;
 412    default:
 413        break;
 414    }
 415
 416    return ret;
 417}
 418
 419static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
 420                                       unsigned size)
 421{
 422    VirtIOPCIProxy *proxy = opaque;
 423    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 424    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 425    uint64_t val = 0;
 426
 427    if (vdev == NULL) {
 428        return UINT64_MAX;
 429    }
 430
 431    if (addr < config) {
 432        return virtio_ioport_read(proxy, addr);
 433    }
 434    addr -= config;
 435
 436    switch (size) {
 437    case 1:
 438        val = virtio_config_readb(vdev, addr);
 439        break;
 440    case 2:
 441        val = virtio_config_readw(vdev, addr);
 442        if (virtio_is_big_endian(vdev)) {
 443            val = bswap16(val);
 444        }
 445        break;
 446    case 4:
 447        val = virtio_config_readl(vdev, addr);
 448        if (virtio_is_big_endian(vdev)) {
 449            val = bswap32(val);
 450        }
 451        break;
 452    }
 453    return val;
 454}
 455
 456static void virtio_pci_config_write(void *opaque, hwaddr addr,
 457                                    uint64_t val, unsigned size)
 458{
 459    VirtIOPCIProxy *proxy = opaque;
 460    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 461    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 462
 463    if (vdev == NULL) {
 464        return;
 465    }
 466
 467    if (addr < config) {
 468        virtio_ioport_write(proxy, addr, val);
 469        return;
 470    }
 471    addr -= config;
 472    /*
 473     * Virtio-PCI is odd. Ioports are LE but config space is target native
 474     * endian.
 475     */
 476    switch (size) {
 477    case 1:
 478        virtio_config_writeb(vdev, addr, val);
 479        break;
 480    case 2:
 481        if (virtio_is_big_endian(vdev)) {
 482            val = bswap16(val);
 483        }
 484        virtio_config_writew(vdev, addr, val);
 485        break;
 486    case 4:
 487        if (virtio_is_big_endian(vdev)) {
 488            val = bswap32(val);
 489        }
 490        virtio_config_writel(vdev, addr, val);
 491        break;
 492    }
 493}
 494
 495static const MemoryRegionOps virtio_pci_config_ops = {
 496    .read = virtio_pci_config_read,
 497    .write = virtio_pci_config_write,
 498    .impl = {
 499        .min_access_size = 1,
 500        .max_access_size = 4,
 501    },
 502    .endianness = DEVICE_LITTLE_ENDIAN,
 503};
 504
 505static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
 506                                                 hwaddr *off, int len)
 507{
 508    int i;
 509    VirtIOPCIRegion *reg;
 510
 511    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
 512        reg = &proxy->regs[i];
 513        if (*off >= reg->offset &&
 514            *off + len <= reg->offset + reg->size) {
 515            *off -= reg->offset;
 516            return &reg->mr;
 517        }
 518    }
 519
 520    return NULL;
 521}
 522
 523/* Below are generic functions to do memcpy from/to an address space,
 524 * without byteswaps, with input validation.
 525 *
 526 * As regular address_space_* APIs all do some kind of byteswap at least for
 527 * some host/target combinations, we are forced to explicitly convert to a
 528 * known-endianness integer value.
 529 * It doesn't really matter which endian format to go through, so the code
 530 * below selects the endian that causes the least amount of work on the given
 531 * host.
 532 *
 533 * Note: host pointer must be aligned.
 534 */
 535static
 536void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
 537                                const uint8_t *buf, int len)
 538{
 539    uint64_t val;
 540    MemoryRegion *mr;
 541
 542    /* address_space_* APIs assume an aligned address.
 543     * As address is under guest control, handle illegal values.
 544     */
 545    addr &= ~(len - 1);
 546
 547    mr = virtio_address_space_lookup(proxy, &addr, len);
 548    if (!mr) {
 549        return;
 550    }
 551
 552    /* Make sure caller aligned buf properly */
 553    assert(!(((uintptr_t)buf) & (len - 1)));
 554
 555    switch (len) {
 556    case 1:
 557        val = pci_get_byte(buf);
 558        break;
 559    case 2:
 560        val = pci_get_word(buf);
 561        break;
 562    case 4:
 563        val = pci_get_long(buf);
 564        break;
 565    default:
 566        /* As length is under guest control, handle illegal values. */
 567        return;
 568    }
 569    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
 570                                 MEMTXATTRS_UNSPECIFIED);
 571}
 572
 573static void
 574virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
 575                          uint8_t *buf, int len)
 576{
 577    uint64_t val;
 578    MemoryRegion *mr;
 579
 580    /* address_space_* APIs assume an aligned address.
 581     * As address is under guest control, handle illegal values.
 582     */
 583    addr &= ~(len - 1);
 584
 585    mr = virtio_address_space_lookup(proxy, &addr, len);
 586    if (!mr) {
 587        return;
 588    }
 589
 590    /* Make sure caller aligned buf properly */
 591    assert(!(((uintptr_t)buf) & (len - 1)));
 592
 593    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
 594                                MEMTXATTRS_UNSPECIFIED);
 595    switch (len) {
 596    case 1:
 597        pci_set_byte(buf, val);
 598        break;
 599    case 2:
 600        pci_set_word(buf, val);
 601        break;
 602    case 4:
 603        pci_set_long(buf, val);
 604        break;
 605    default:
 606        /* As length is under guest control, handle illegal values. */
 607        break;
 608    }
 609}
 610
 611static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 612                                uint32_t val, int len)
 613{
 614    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 615    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 616    struct virtio_pci_cfg_cap *cfg;
 617
 618    pci_default_write_config(pci_dev, address, val, len);
 619
 620    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
 621        pcie_cap_flr_write_config(pci_dev, address, val, len);
 622    }
 623
 624    if (range_covers_byte(address, len, PCI_COMMAND)) {
 625        if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
 626            virtio_set_disabled(vdev, true);
 627            virtio_pci_stop_ioeventfd(proxy);
 628            virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
 629        } else {
 630            virtio_set_disabled(vdev, false);
 631        }
 632    }
 633
 634    if (proxy->config_cap &&
 635        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 636                                                                  pci_cfg_data),
 637                       sizeof cfg->pci_cfg_data)) {
 638        uint32_t off;
 639        uint32_t len;
 640
 641        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 642        off = le32_to_cpu(cfg->cap.offset);
 643        len = le32_to_cpu(cfg->cap.length);
 644
 645        if (len == 1 || len == 2 || len == 4) {
 646            assert(len <= sizeof cfg->pci_cfg_data);
 647            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
 648        }
 649    }
 650}
 651
 652static uint32_t virtio_read_config(PCIDevice *pci_dev,
 653                                   uint32_t address, int len)
 654{
 655    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 656    struct virtio_pci_cfg_cap *cfg;
 657
 658    if (proxy->config_cap &&
 659        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 660                                                                  pci_cfg_data),
 661                       sizeof cfg->pci_cfg_data)) {
 662        uint32_t off;
 663        uint32_t len;
 664
 665        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 666        off = le32_to_cpu(cfg->cap.offset);
 667        len = le32_to_cpu(cfg->cap.length);
 668
 669        if (len == 1 || len == 2 || len == 4) {
 670            assert(len <= sizeof cfg->pci_cfg_data);
 671            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
 672        }
 673    }
 674
 675    return pci_default_read_config(pci_dev, address, len);
 676}
 677
 678static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
 679                                        unsigned int queue_no,
 680                                        unsigned int vector)
 681{
 682    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 683    int ret;
 684
 685    if (irqfd->users == 0) {
 686        KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
 687        ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
 688        if (ret < 0) {
 689            return ret;
 690        }
 691        kvm_irqchip_commit_route_changes(&c);
 692        irqfd->virq = ret;
 693    }
 694    irqfd->users++;
 695    return 0;
 696}
 697
 698static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 699                                             unsigned int vector)
 700{
 701    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 702    if (--irqfd->users == 0) {
 703        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
 704    }
 705}
 706
 707static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
 708                                 unsigned int queue_no,
 709                                 unsigned int vector)
 710{
 711    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 712    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 713    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 714    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 715    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 716}
 717
 718static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
 719                                      unsigned int queue_no,
 720                                      unsigned int vector)
 721{
 722    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 723    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 724    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 725    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 726    int ret;
 727
 728    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
 729    assert(ret == 0);
 730}
 731
 732static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 733{
 734    PCIDevice *dev = &proxy->pci_dev;
 735    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 736    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 737    unsigned int vector;
 738    int ret, queue_no;
 739
 740    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 741        if (!virtio_queue_get_num(vdev, queue_no)) {
 742            break;
 743        }
 744        vector = virtio_queue_vector(vdev, queue_no);
 745        if (vector >= msix_nr_vectors_allocated(dev)) {
 746            continue;
 747        }
 748        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
 749        if (ret < 0) {
 750            goto undo;
 751        }
 752        /* If guest supports masking, set up irqfd now.
 753         * Otherwise, delay until unmasked in the frontend.
 754         */
 755        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 756            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 757            if (ret < 0) {
 758                kvm_virtio_pci_vq_vector_release(proxy, vector);
 759                goto undo;
 760            }
 761        }
 762    }
 763    return 0;
 764
 765undo:
 766    while (--queue_no >= 0) {
 767        vector = virtio_queue_vector(vdev, queue_no);
 768        if (vector >= msix_nr_vectors_allocated(dev)) {
 769            continue;
 770        }
 771        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 772            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 773        }
 774        kvm_virtio_pci_vq_vector_release(proxy, vector);
 775    }
 776    return ret;
 777}
 778
 779static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
 780{
 781    PCIDevice *dev = &proxy->pci_dev;
 782    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 783    unsigned int vector;
 784    int queue_no;
 785    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 786
 787    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 788        if (!virtio_queue_get_num(vdev, queue_no)) {
 789            break;
 790        }
 791        vector = virtio_queue_vector(vdev, queue_no);
 792        if (vector >= msix_nr_vectors_allocated(dev)) {
 793            continue;
 794        }
 795        /* If guest supports masking, clean up irqfd now.
 796         * Otherwise, it was cleaned when masked in the frontend.
 797         */
 798        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 799            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 800        }
 801        kvm_virtio_pci_vq_vector_release(proxy, vector);
 802    }
 803}
 804
 805static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
 806                                       unsigned int queue_no,
 807                                       unsigned int vector,
 808                                       MSIMessage msg)
 809{
 810    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 811    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 812    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 813    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 814    VirtIOIRQFD *irqfd;
 815    int ret = 0;
 816
 817    if (proxy->vector_irqfd) {
 818        irqfd = &proxy->vector_irqfd[vector];
 819        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
 820            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
 821                                               &proxy->pci_dev);
 822            if (ret < 0) {
 823                return ret;
 824            }
 825            kvm_irqchip_commit_routes(kvm_state);
 826        }
 827    }
 828
 829    /* If guest supports masking, irqfd is already setup, unmask it.
 830     * Otherwise, set it up now.
 831     */
 832    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 833        k->guest_notifier_mask(vdev, queue_no, false);
 834        /* Test after unmasking to avoid losing events. */
 835        if (k->guest_notifier_pending &&
 836            k->guest_notifier_pending(vdev, queue_no)) {
 837            event_notifier_set(n);
 838        }
 839    } else {
 840        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 841    }
 842    return ret;
 843}
 844
 845static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
 846                                             unsigned int queue_no,
 847                                             unsigned int vector)
 848{
 849    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 850    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 851
 852    /* If guest supports masking, keep irqfd but mask it.
 853     * Otherwise, clean it up now.
 854     */ 
 855    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 856        k->guest_notifier_mask(vdev, queue_no, true);
 857    } else {
 858        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 859    }
 860}
 861
 862static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
 863                                    MSIMessage msg)
 864{
 865    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 866    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 867    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 868    int ret, index, unmasked = 0;
 869
 870    while (vq) {
 871        index = virtio_get_queue_index(vq);
 872        if (!virtio_queue_get_num(vdev, index)) {
 873            break;
 874        }
 875        if (index < proxy->nvqs_with_notifiers) {
 876            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
 877            if (ret < 0) {
 878                goto undo;
 879            }
 880            ++unmasked;
 881        }
 882        vq = virtio_vector_next_queue(vq);
 883    }
 884
 885    return 0;
 886
 887undo:
 888    vq = virtio_vector_first_queue(vdev, vector);
 889    while (vq && unmasked >= 0) {
 890        index = virtio_get_queue_index(vq);
 891        if (index < proxy->nvqs_with_notifiers) {
 892            virtio_pci_vq_vector_mask(proxy, index, vector);
 893            --unmasked;
 894        }
 895        vq = virtio_vector_next_queue(vq);
 896    }
 897    return ret;
 898}
 899
 900static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 901{
 902    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 903    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 904    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 905    int index;
 906
 907    while (vq) {
 908        index = virtio_get_queue_index(vq);
 909        if (!virtio_queue_get_num(vdev, index)) {
 910            break;
 911        }
 912        if (index < proxy->nvqs_with_notifiers) {
 913            virtio_pci_vq_vector_mask(proxy, index, vector);
 914        }
 915        vq = virtio_vector_next_queue(vq);
 916    }
 917}
 918
 919static void virtio_pci_vector_poll(PCIDevice *dev,
 920                                   unsigned int vector_start,
 921                                   unsigned int vector_end)
 922{
 923    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 924    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 925    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 926    int queue_no;
 927    unsigned int vector;
 928    EventNotifier *notifier;
 929    VirtQueue *vq;
 930
 931    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
 932        if (!virtio_queue_get_num(vdev, queue_no)) {
 933            break;
 934        }
 935        vector = virtio_queue_vector(vdev, queue_no);
 936        if (vector < vector_start || vector >= vector_end ||
 937            !msix_is_masked(dev, vector)) {
 938            continue;
 939        }
 940        vq = virtio_get_queue(vdev, queue_no);
 941        notifier = virtio_queue_get_guest_notifier(vq);
 942        if (k->guest_notifier_pending) {
 943            if (k->guest_notifier_pending(vdev, queue_no)) {
 944                msix_set_pending(dev, vector);
 945            }
 946        } else if (event_notifier_test_and_clear(notifier)) {
 947            msix_set_pending(dev, vector);
 948        }
 949    }
 950}
 951
 952static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
 953                                         bool with_irqfd)
 954{
 955    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 956    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 957    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 958    VirtQueue *vq = virtio_get_queue(vdev, n);
 959    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 960
 961    if (assign) {
 962        int r = event_notifier_init(notifier, 0);
 963        if (r < 0) {
 964            return r;
 965        }
 966        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
 967    } else {
 968        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
 969        event_notifier_cleanup(notifier);
 970    }
 971
 972    if (!msix_enabled(&proxy->pci_dev) &&
 973        vdev->use_guest_notifier_mask &&
 974        vdc->guest_notifier_mask) {
 975        vdc->guest_notifier_mask(vdev, n, !assign);
 976    }
 977
 978    return 0;
 979}
 980
 981static bool virtio_pci_query_guest_notifiers(DeviceState *d)
 982{
 983    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 984    return msix_enabled(&proxy->pci_dev);
 985}
 986
 987static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 988{
 989    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 990    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 991    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 992    int r, n;
 993    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
 994        kvm_msi_via_irqfd_enabled();
 995
 996    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
 997
 998    /* When deassigning, pass a consistent nvqs value
 999     * to avoid leaking notifiers.
1000     */
1001    assert(assign || nvqs == proxy->nvqs_with_notifiers);
1002
1003    proxy->nvqs_with_notifiers = nvqs;
1004
1005    /* Must unset vector notifier while guest notifier is still assigned */
1006    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
1007        msix_unset_vector_notifiers(&proxy->pci_dev);
1008        if (proxy->vector_irqfd) {
1009            kvm_virtio_pci_vector_release(proxy, nvqs);
1010            g_free(proxy->vector_irqfd);
1011            proxy->vector_irqfd = NULL;
1012        }
1013    }
1014
1015    for (n = 0; n < nvqs; n++) {
1016        if (!virtio_queue_get_num(vdev, n)) {
1017            break;
1018        }
1019
1020        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1021        if (r < 0) {
1022            goto assign_error;
1023        }
1024    }
1025
1026    /* Must set vector notifier after guest notifier has been assigned */
1027    if ((with_irqfd || k->guest_notifier_mask) && assign) {
1028        if (with_irqfd) {
1029            proxy->vector_irqfd =
1030                g_malloc0(sizeof(*proxy->vector_irqfd) *
1031                          msix_nr_vectors_allocated(&proxy->pci_dev));
1032            r = kvm_virtio_pci_vector_use(proxy, nvqs);
1033            if (r < 0) {
1034                goto assign_error;
1035            }
1036        }
1037        r = msix_set_vector_notifiers(&proxy->pci_dev,
1038                                      virtio_pci_vector_unmask,
1039                                      virtio_pci_vector_mask,
1040                                      virtio_pci_vector_poll);
1041        if (r < 0) {
1042            goto notifiers_error;
1043        }
1044    }
1045
1046    return 0;
1047
1048notifiers_error:
1049    if (with_irqfd) {
1050        assert(assign);
1051        kvm_virtio_pci_vector_release(proxy, nvqs);
1052    }
1053
1054assign_error:
1055    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1056    assert(assign);
1057    while (--n >= 0) {
1058        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1059    }
1060    return r;
1061}
1062
1063static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1064                                           MemoryRegion *mr, bool assign)
1065{
1066    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1067    int offset;
1068
1069    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1070        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1071        return -1;
1072    }
1073
1074    if (assign) {
1075        offset = virtio_pci_queue_mem_mult(proxy) * n;
1076        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1077    } else {
1078        memory_region_del_subregion(&proxy->notify.mr, mr);
1079    }
1080
1081    return 0;
1082}
1083
1084static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1085{
1086    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1087    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1088
1089    if (running) {
1090        /* Old QEMU versions did not set bus master enable on status write.
1091         * Detect DRIVER set and enable it.
1092         */
1093        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1094            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1095            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1096            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1097                                     proxy->pci_dev.config[PCI_COMMAND] |
1098                                     PCI_COMMAND_MASTER, 1);
1099        }
1100        virtio_pci_start_ioeventfd(proxy);
1101    } else {
1102        virtio_pci_stop_ioeventfd(proxy);
1103    }
1104}
1105
1106/*
1107 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1108 */
1109
1110static int virtio_pci_query_nvectors(DeviceState *d)
1111{
1112    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1113
1114    return proxy->nvectors;
1115}
1116
1117static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1118{
1119    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1120    PCIDevice *dev = &proxy->pci_dev;
1121
1122    return pci_get_address_space(dev);
1123}
1124
1125static bool virtio_pci_iommu_enabled(DeviceState *d)
1126{
1127    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1128    PCIDevice *dev = &proxy->pci_dev;
1129    AddressSpace *dma_as = pci_device_iommu_address_space(dev);
1130
1131    if (dma_as == &address_space_memory) {
1132        return false;
1133    }
1134
1135    return true;
1136}
1137
1138static bool virtio_pci_queue_enabled(DeviceState *d, int n)
1139{
1140    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1141    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1142
1143    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1144        return proxy->vqs[n].enabled;
1145    }
1146
1147    return virtio_queue_enabled_legacy(vdev, n);
1148}
1149
1150static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1151                                   struct virtio_pci_cap *cap)
1152{
1153    PCIDevice *dev = &proxy->pci_dev;
1154    int offset;
1155
1156    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1157                                cap->cap_len, &error_abort);
1158
1159    assert(cap->cap_len >= sizeof *cap);
1160    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1161           cap->cap_len - PCI_CAP_FLAGS);
1162
1163    return offset;
1164}
1165
1166static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1167                                       unsigned size)
1168{
1169    VirtIOPCIProxy *proxy = opaque;
1170    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1171    uint32_t val = 0;
1172    int i;
1173
1174    if (vdev == NULL) {
1175        return UINT64_MAX;
1176    }
1177
1178    switch (addr) {
1179    case VIRTIO_PCI_COMMON_DFSELECT:
1180        val = proxy->dfselect;
1181        break;
1182    case VIRTIO_PCI_COMMON_DF:
1183        if (proxy->dfselect <= 1) {
1184            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1185
1186            val = (vdev->host_features & ~vdc->legacy_features) >>
1187                (32 * proxy->dfselect);
1188        }
1189        break;
1190    case VIRTIO_PCI_COMMON_GFSELECT:
1191        val = proxy->gfselect;
1192        break;
1193    case VIRTIO_PCI_COMMON_GF:
1194        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1195            val = proxy->guest_features[proxy->gfselect];
1196        }
1197        break;
1198    case VIRTIO_PCI_COMMON_MSIX:
1199        val = vdev->config_vector;
1200        break;
1201    case VIRTIO_PCI_COMMON_NUMQ:
1202        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1203            if (virtio_queue_get_num(vdev, i)) {
1204                val = i + 1;
1205            }
1206        }
1207        break;
1208    case VIRTIO_PCI_COMMON_STATUS:
1209        val = vdev->status;
1210        break;
1211    case VIRTIO_PCI_COMMON_CFGGENERATION:
1212        val = vdev->generation;
1213        break;
1214    case VIRTIO_PCI_COMMON_Q_SELECT:
1215        val = vdev->queue_sel;
1216        break;
1217    case VIRTIO_PCI_COMMON_Q_SIZE:
1218        val = virtio_queue_get_num(vdev, vdev->queue_sel);
1219        break;
1220    case VIRTIO_PCI_COMMON_Q_MSIX:
1221        val = virtio_queue_vector(vdev, vdev->queue_sel);
1222        break;
1223    case VIRTIO_PCI_COMMON_Q_ENABLE:
1224        val = proxy->vqs[vdev->queue_sel].enabled;
1225        break;
1226    case VIRTIO_PCI_COMMON_Q_NOFF:
1227        /* Simply map queues in order */
1228        val = vdev->queue_sel;
1229        break;
1230    case VIRTIO_PCI_COMMON_Q_DESCLO:
1231        val = proxy->vqs[vdev->queue_sel].desc[0];
1232        break;
1233    case VIRTIO_PCI_COMMON_Q_DESCHI:
1234        val = proxy->vqs[vdev->queue_sel].desc[1];
1235        break;
1236    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1237        val = proxy->vqs[vdev->queue_sel].avail[0];
1238        break;
1239    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1240        val = proxy->vqs[vdev->queue_sel].avail[1];
1241        break;
1242    case VIRTIO_PCI_COMMON_Q_USEDLO:
1243        val = proxy->vqs[vdev->queue_sel].used[0];
1244        break;
1245    case VIRTIO_PCI_COMMON_Q_USEDHI:
1246        val = proxy->vqs[vdev->queue_sel].used[1];
1247        break;
1248    default:
1249        val = 0;
1250    }
1251
1252    return val;
1253}
1254
1255static void virtio_pci_common_write(void *opaque, hwaddr addr,
1256                                    uint64_t val, unsigned size)
1257{
1258    VirtIOPCIProxy *proxy = opaque;
1259    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1260
1261    if (vdev == NULL) {
1262        return;
1263    }
1264
1265    switch (addr) {
1266    case VIRTIO_PCI_COMMON_DFSELECT:
1267        proxy->dfselect = val;
1268        break;
1269    case VIRTIO_PCI_COMMON_GFSELECT:
1270        proxy->gfselect = val;
1271        break;
1272    case VIRTIO_PCI_COMMON_GF:
1273        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1274            proxy->guest_features[proxy->gfselect] = val;
1275            virtio_set_features(vdev,
1276                                (((uint64_t)proxy->guest_features[1]) << 32) |
1277                                proxy->guest_features[0]);
1278        }
1279        break;
1280    case VIRTIO_PCI_COMMON_MSIX:
1281        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1282        /* Make it possible for guest to discover an error took place. */
1283        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1284            val = VIRTIO_NO_VECTOR;
1285        }
1286        vdev->config_vector = val;
1287        break;
1288    case VIRTIO_PCI_COMMON_STATUS:
1289        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1290            virtio_pci_stop_ioeventfd(proxy);
1291        }
1292
1293        virtio_set_status(vdev, val & 0xFF);
1294
1295        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1296            virtio_pci_start_ioeventfd(proxy);
1297        }
1298
1299        if (vdev->status == 0) {
1300            virtio_pci_reset(DEVICE(proxy));
1301        }
1302
1303        break;
1304    case VIRTIO_PCI_COMMON_Q_SELECT:
1305        if (val < VIRTIO_QUEUE_MAX) {
1306            vdev->queue_sel = val;
1307        }
1308        break;
1309    case VIRTIO_PCI_COMMON_Q_SIZE:
1310        proxy->vqs[vdev->queue_sel].num = val;
1311        virtio_queue_set_num(vdev, vdev->queue_sel,
1312                             proxy->vqs[vdev->queue_sel].num);
1313        break;
1314    case VIRTIO_PCI_COMMON_Q_MSIX:
1315        msix_vector_unuse(&proxy->pci_dev,
1316                          virtio_queue_vector(vdev, vdev->queue_sel));
1317        /* Make it possible for guest to discover an error took place. */
1318        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1319            val = VIRTIO_NO_VECTOR;
1320        }
1321        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
1322        break;
1323    case VIRTIO_PCI_COMMON_Q_ENABLE:
1324        if (val == 1) {
1325            virtio_queue_set_num(vdev, vdev->queue_sel,
1326                                 proxy->vqs[vdev->queue_sel].num);
1327            virtio_queue_set_rings(vdev, vdev->queue_sel,
1328                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1329                       proxy->vqs[vdev->queue_sel].desc[0],
1330                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1331                       proxy->vqs[vdev->queue_sel].avail[0],
1332                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1333                       proxy->vqs[vdev->queue_sel].used[0]);
1334            proxy->vqs[vdev->queue_sel].enabled = 1;
1335        } else {
1336            virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
1337        }
1338        break;
1339    case VIRTIO_PCI_COMMON_Q_DESCLO:
1340        proxy->vqs[vdev->queue_sel].desc[0] = val;
1341        break;
1342    case VIRTIO_PCI_COMMON_Q_DESCHI:
1343        proxy->vqs[vdev->queue_sel].desc[1] = val;
1344        break;
1345    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1346        proxy->vqs[vdev->queue_sel].avail[0] = val;
1347        break;
1348    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1349        proxy->vqs[vdev->queue_sel].avail[1] = val;
1350        break;
1351    case VIRTIO_PCI_COMMON_Q_USEDLO:
1352        proxy->vqs[vdev->queue_sel].used[0] = val;
1353        break;
1354    case VIRTIO_PCI_COMMON_Q_USEDHI:
1355        proxy->vqs[vdev->queue_sel].used[1] = val;
1356        break;
1357    default:
1358        break;
1359    }
1360}
1361
1362
1363static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1364                                       unsigned size)
1365{
1366    VirtIOPCIProxy *proxy = opaque;
1367    if (virtio_bus_get_device(&proxy->bus) == NULL) {
1368        return UINT64_MAX;
1369    }
1370
1371    return 0;
1372}
1373
1374static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1375                                    uint64_t val, unsigned size)
1376{
1377    VirtIOPCIProxy *proxy = opaque;
1378    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1379
1380    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1381
1382    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1383        virtio_queue_notify(vdev, queue);
1384    }
1385}
1386
1387static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1388                                        uint64_t val, unsigned size)
1389{
1390    VirtIOPCIProxy *proxy = opaque;
1391    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1392
1393    unsigned queue = val;
1394
1395    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1396        virtio_queue_notify(vdev, queue);
1397    }
1398}
1399
1400static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1401                                    unsigned size)
1402{
1403    VirtIOPCIProxy *proxy = opaque;
1404    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1405    uint64_t val;
1406
1407    if (vdev == NULL) {
1408        return UINT64_MAX;
1409    }
1410
1411    val = qatomic_xchg(&vdev->isr, 0);
1412    pci_irq_deassert(&proxy->pci_dev);
1413    return val;
1414}
1415
1416static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1417                                 uint64_t val, unsigned size)
1418{
1419}
1420
1421static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1422                                       unsigned size)
1423{
1424    VirtIOPCIProxy *proxy = opaque;
1425    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1426    uint64_t val;
1427
1428    if (vdev == NULL) {
1429        return UINT64_MAX;
1430    }
1431
1432    switch (size) {
1433    case 1:
1434        val = virtio_config_modern_readb(vdev, addr);
1435        break;
1436    case 2:
1437        val = virtio_config_modern_readw(vdev, addr);
1438        break;
1439    case 4:
1440        val = virtio_config_modern_readl(vdev, addr);
1441        break;
1442    default:
1443        val = 0;
1444        break;
1445    }
1446    return val;
1447}
1448
1449static void virtio_pci_device_write(void *opaque, hwaddr addr,
1450                                    uint64_t val, unsigned size)
1451{
1452    VirtIOPCIProxy *proxy = opaque;
1453    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1454
1455    if (vdev == NULL) {
1456        return;
1457    }
1458
1459    switch (size) {
1460    case 1:
1461        virtio_config_modern_writeb(vdev, addr, val);
1462        break;
1463    case 2:
1464        virtio_config_modern_writew(vdev, addr, val);
1465        break;
1466    case 4:
1467        virtio_config_modern_writel(vdev, addr, val);
1468        break;
1469    }
1470}
1471
1472static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
1473                                           const char *vdev_name)
1474{
1475    static const MemoryRegionOps common_ops = {
1476        .read = virtio_pci_common_read,
1477        .write = virtio_pci_common_write,
1478        .impl = {
1479            .min_access_size = 1,
1480            .max_access_size = 4,
1481        },
1482        .endianness = DEVICE_LITTLE_ENDIAN,
1483    };
1484    static const MemoryRegionOps isr_ops = {
1485        .read = virtio_pci_isr_read,
1486        .write = virtio_pci_isr_write,
1487        .impl = {
1488            .min_access_size = 1,
1489            .max_access_size = 4,
1490        },
1491        .endianness = DEVICE_LITTLE_ENDIAN,
1492    };
1493    static const MemoryRegionOps device_ops = {
1494        .read = virtio_pci_device_read,
1495        .write = virtio_pci_device_write,
1496        .impl = {
1497            .min_access_size = 1,
1498            .max_access_size = 4,
1499        },
1500        .endianness = DEVICE_LITTLE_ENDIAN,
1501    };
1502    static const MemoryRegionOps notify_ops = {
1503        .read = virtio_pci_notify_read,
1504        .write = virtio_pci_notify_write,
1505        .impl = {
1506            .min_access_size = 1,
1507            .max_access_size = 4,
1508        },
1509        .endianness = DEVICE_LITTLE_ENDIAN,
1510    };
1511    static const MemoryRegionOps notify_pio_ops = {
1512        .read = virtio_pci_notify_read,
1513        .write = virtio_pci_notify_write_pio,
1514        .impl = {
1515            .min_access_size = 1,
1516            .max_access_size = 4,
1517        },
1518        .endianness = DEVICE_LITTLE_ENDIAN,
1519    };
1520    g_autoptr(GString) name = g_string_new(NULL);
1521
1522    g_string_printf(name, "virtio-pci-common-%s", vdev_name);
1523    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1524                          &common_ops,
1525                          proxy,
1526                          name->str,
1527                          proxy->common.size);
1528
1529    g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
1530    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1531                          &isr_ops,
1532                          proxy,
1533                          name->str,
1534                          proxy->isr.size);
1535
1536    g_string_printf(name, "virtio-pci-device-%s", vdev_name);
1537    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1538                          &device_ops,
1539                          proxy,
1540                          name->str,
1541                          proxy->device.size);
1542
1543    g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
1544    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1545                          &notify_ops,
1546                          proxy,
1547                          name->str,
1548                          proxy->notify.size);
1549
1550    g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
1551    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1552                          &notify_pio_ops,
1553                          proxy,
1554                          name->str,
1555                          proxy->notify_pio.size);
1556}
1557
1558static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1559                                         VirtIOPCIRegion *region,
1560                                         struct virtio_pci_cap *cap,
1561                                         MemoryRegion *mr,
1562                                         uint8_t bar)
1563{
1564    memory_region_add_subregion(mr, region->offset, &region->mr);
1565
1566    cap->cfg_type = region->type;
1567    cap->bar = bar;
1568    cap->offset = cpu_to_le32(region->offset);
1569    cap->length = cpu_to_le32(region->size);
1570    virtio_pci_add_mem_cap(proxy, cap);
1571
1572}
1573
1574static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1575                                             VirtIOPCIRegion *region,
1576                                             struct virtio_pci_cap *cap)
1577{
1578    virtio_pci_modern_region_map(proxy, region, cap,
1579                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1580}
1581
1582static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1583                                            VirtIOPCIRegion *region,
1584                                            struct virtio_pci_cap *cap)
1585{
1586    virtio_pci_modern_region_map(proxy, region, cap,
1587                                 &proxy->io_bar, proxy->modern_io_bar_idx);
1588}
1589
1590static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1591                                               VirtIOPCIRegion *region)
1592{
1593    memory_region_del_subregion(&proxy->modern_bar,
1594                                &region->mr);
1595}
1596
1597static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1598                                              VirtIOPCIRegion *region)
1599{
1600    memory_region_del_subregion(&proxy->io_bar,
1601                                &region->mr);
1602}
1603
1604static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1605{
1606    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1607    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1608
1609    if (virtio_pci_modern(proxy)) {
1610        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1611    }
1612
1613    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1614}
1615
1616/* This is called by virtio-bus just after the device is plugged. */
1617static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1618{
1619    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1620    VirtioBusState *bus = &proxy->bus;
1621    bool legacy = virtio_pci_legacy(proxy);
1622    bool modern;
1623    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1624    uint8_t *config;
1625    uint32_t size;
1626    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1627
1628    /*
1629     * Virtio capabilities present without
1630     * VIRTIO_F_VERSION_1 confuses guests
1631     */
1632    if (!proxy->ignore_backend_features &&
1633            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1634        virtio_pci_disable_modern(proxy);
1635
1636        if (!legacy) {
1637            error_setg(errp, "Device doesn't support modern mode, and legacy"
1638                             " mode is disabled");
1639            error_append_hint(errp, "Set disable-legacy to off\n");
1640
1641            return;
1642        }
1643    }
1644
1645    modern = virtio_pci_modern(proxy);
1646
1647    config = proxy->pci_dev.config;
1648    if (proxy->class_code) {
1649        pci_config_set_class(config, proxy->class_code);
1650    }
1651
1652    if (legacy) {
1653        if (!virtio_legacy_allowed(vdev)) {
1654            /*
1655             * To avoid migration issues, we allow legacy mode when legacy
1656             * check is disabled in the old machine types (< 5.1).
1657             */
1658            if (virtio_legacy_check_disabled(vdev)) {
1659                warn_report("device is modern-only, but for backward "
1660                            "compatibility legacy is allowed");
1661            } else {
1662                error_setg(errp,
1663                           "device is modern-only, use disable-legacy=on");
1664                return;
1665            }
1666        }
1667        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1668            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
1669                       " neither legacy nor transitional device");
1670            return ;
1671        }
1672        /*
1673         * Legacy and transitional devices use specific subsystem IDs.
1674         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
1675         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
1676         */
1677        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
1678    } else {
1679        /* pure virtio-1.0 */
1680        pci_set_word(config + PCI_VENDOR_ID,
1681                     PCI_VENDOR_ID_REDHAT_QUMRANET);
1682        pci_set_word(config + PCI_DEVICE_ID,
1683                     0x1040 + virtio_bus_get_vdev_id(bus));
1684        pci_config_set_revision(config, 1);
1685    }
1686    config[PCI_INTERRUPT_PIN] = 1;
1687
1688
1689    if (modern) {
1690        struct virtio_pci_cap cap = {
1691            .cap_len = sizeof cap,
1692        };
1693        struct virtio_pci_notify_cap notify = {
1694            .cap.cap_len = sizeof notify,
1695            .notify_off_multiplier =
1696                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
1697        };
1698        struct virtio_pci_cfg_cap cfg = {
1699            .cap.cap_len = sizeof cfg,
1700            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
1701        };
1702        struct virtio_pci_notify_cap notify_pio = {
1703            .cap.cap_len = sizeof notify,
1704            .notify_off_multiplier = cpu_to_le32(0x0),
1705        };
1706
1707        struct virtio_pci_cfg_cap *cfg_mask;
1708
1709        virtio_pci_modern_regions_init(proxy, vdev->name);
1710
1711        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
1712        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
1713        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
1714        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
1715
1716        if (modern_pio) {
1717            memory_region_init(&proxy->io_bar, OBJECT(proxy),
1718                               "virtio-pci-io", 0x4);
1719
1720            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
1721                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
1722
1723            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
1724                                            &notify_pio.cap);
1725        }
1726
1727        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
1728                         PCI_BASE_ADDRESS_SPACE_MEMORY |
1729                         PCI_BASE_ADDRESS_MEM_PREFETCH |
1730                         PCI_BASE_ADDRESS_MEM_TYPE_64,
1731                         &proxy->modern_bar);
1732
1733        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
1734        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
1735        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
1736        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
1737        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
1738        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
1739    }
1740
1741    if (proxy->nvectors) {
1742        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
1743                                          proxy->msix_bar_idx, NULL);
1744        if (err) {
1745            /* Notice when a system that supports MSIx can't initialize it */
1746            if (err != -ENOTSUP) {
1747                warn_report("unable to init msix vectors to %" PRIu32,
1748                            proxy->nvectors);
1749            }
1750            proxy->nvectors = 0;
1751        }
1752    }
1753
1754    proxy->pci_dev.config_write = virtio_write_config;
1755    proxy->pci_dev.config_read = virtio_read_config;
1756
1757    if (legacy) {
1758        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
1759            + virtio_bus_get_vdev_config_len(bus);
1760        size = pow2ceil(size);
1761
1762        memory_region_init_io(&proxy->bar, OBJECT(proxy),
1763                              &virtio_pci_config_ops,
1764                              proxy, "virtio-pci", size);
1765
1766        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
1767                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
1768    }
1769}
1770
1771static void virtio_pci_device_unplugged(DeviceState *d)
1772{
1773    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1774    bool modern = virtio_pci_modern(proxy);
1775    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1776
1777    virtio_pci_stop_ioeventfd(proxy);
1778
1779    if (modern) {
1780        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
1781        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
1782        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
1783        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
1784        if (modern_pio) {
1785            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
1786        }
1787    }
1788}
1789
1790static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
1791{
1792    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1793    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
1794    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1795                     !pci_bus_is_root(pci_get_bus(pci_dev));
1796
1797    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
1798        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
1799    }
1800
1801    /* fd-based ioevents can't be synchronized in record/replay */
1802    if (replay_mode != REPLAY_MODE_NONE) {
1803        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
1804    }
1805
1806    /*
1807     * virtio pci bar layout used by default.
1808     * subclasses can re-arrange things if needed.
1809     *
1810     *   region 0   --  virtio legacy io bar
1811     *   region 1   --  msi-x bar
1812     *   region 2   --  virtio modern io bar (off by default)
1813     *   region 4+5 --  virtio modern memory (64bit) bar
1814     *
1815     */
1816    proxy->legacy_io_bar_idx  = 0;
1817    proxy->msix_bar_idx       = 1;
1818    proxy->modern_io_bar_idx  = 2;
1819    proxy->modern_mem_bar_idx = 4;
1820
1821    proxy->common.offset = 0x0;
1822    proxy->common.size = 0x1000;
1823    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
1824
1825    proxy->isr.offset = 0x1000;
1826    proxy->isr.size = 0x1000;
1827    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
1828
1829    proxy->device.offset = 0x2000;
1830    proxy->device.size = 0x1000;
1831    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
1832
1833    proxy->notify.offset = 0x3000;
1834    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
1835    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1836
1837    proxy->notify_pio.offset = 0x0;
1838    proxy->notify_pio.size = 0x4;
1839    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1840
1841    /* subclasses can enforce modern, so do this unconditionally */
1842    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
1843                       /* PCI BAR regions must be powers of 2 */
1844                       pow2ceil(proxy->notify.offset + proxy->notify.size));
1845
1846    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
1847        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
1848    }
1849
1850    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
1851        error_setg(errp, "device cannot work as neither modern nor legacy mode"
1852                   " is enabled");
1853        error_append_hint(errp, "Set either disable-modern or disable-legacy"
1854                          " to off\n");
1855        return;
1856    }
1857
1858    if (pcie_port && pci_is_express(pci_dev)) {
1859        int pos;
1860        uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
1861
1862        pos = pcie_endpoint_cap_init(pci_dev, 0);
1863        assert(pos > 0);
1864
1865        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
1866                                 PCI_PM_SIZEOF, errp);
1867        if (pos < 0) {
1868            return;
1869        }
1870
1871        pci_dev->exp.pm_cap = pos;
1872
1873        /*
1874         * Indicates that this function complies with revision 1.2 of the
1875         * PCI Power Management Interface Specification.
1876         */
1877        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
1878
1879        if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
1880            pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
1881                          PCI_ERR_SIZEOF, NULL);
1882            last_pcie_cap_offset += PCI_ERR_SIZEOF;
1883        }
1884
1885        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
1886            /* Init error enabling flags */
1887            pcie_cap_deverr_init(pci_dev);
1888        }
1889
1890        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
1891            /* Init Link Control Register */
1892            pcie_cap_lnkctl_init(pci_dev);
1893        }
1894
1895        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
1896            /* Init Power Management Control Register */
1897            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
1898                         PCI_PM_CTRL_STATE_MASK);
1899        }
1900
1901        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
1902            pcie_ats_init(pci_dev, last_pcie_cap_offset,
1903                          proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
1904            last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
1905        }
1906
1907        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
1908            /* Set Function Level Reset capability bit */
1909            pcie_cap_flr_init(pci_dev);
1910        }
1911    } else {
1912        /*
1913         * make future invocations of pci_is_express() return false
1914         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
1915         */
1916        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
1917    }
1918
1919    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
1920    if (k->realize) {
1921        k->realize(proxy, errp);
1922    }
1923}
1924
1925static void virtio_pci_exit(PCIDevice *pci_dev)
1926{
1927    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1928    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1929                     !pci_bus_is_root(pci_get_bus(pci_dev));
1930
1931    msix_uninit_exclusive_bar(pci_dev);
1932    if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
1933        pci_is_express(pci_dev)) {
1934        pcie_aer_exit(pci_dev);
1935    }
1936}
1937
1938static void virtio_pci_reset(DeviceState *qdev)
1939{
1940    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1941    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
1942    PCIDevice *dev = PCI_DEVICE(qdev);
1943    int i;
1944
1945    virtio_pci_stop_ioeventfd(proxy);
1946    virtio_bus_reset(bus);
1947    msix_unuse_all_vectors(&proxy->pci_dev);
1948
1949    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1950        proxy->vqs[i].enabled = 0;
1951        proxy->vqs[i].num = 0;
1952        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
1953        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
1954        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
1955    }
1956
1957    if (pci_is_express(dev)) {
1958        pcie_cap_deverr_reset(dev);
1959        pcie_cap_lnkctl_reset(dev);
1960
1961        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
1962    }
1963}
1964
1965static Property virtio_pci_properties[] = {
1966    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
1967                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
1968    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
1969                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
1970    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
1971                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
1972    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
1973                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
1974    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
1975                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
1976    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
1977                     ignore_backend_features, false),
1978    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
1979                    VIRTIO_PCI_FLAG_ATS_BIT, false),
1980    DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
1981                    VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
1982    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
1983                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
1984    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
1985                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
1986    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
1987                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
1988    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
1989                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
1990    DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
1991                    VIRTIO_PCI_FLAG_AER_BIT, false),
1992    DEFINE_PROP_END_OF_LIST(),
1993};
1994
1995static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
1996{
1997    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
1998    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1999    PCIDevice *pci_dev = &proxy->pci_dev;
2000
2001    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
2002        virtio_pci_modern(proxy)) {
2003        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2004    }
2005
2006    vpciklass->parent_dc_realize(qdev, errp);
2007}
2008
2009static void virtio_pci_class_init(ObjectClass *klass, void *data)
2010{
2011    DeviceClass *dc = DEVICE_CLASS(klass);
2012    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
2013    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
2014
2015    device_class_set_props(dc, virtio_pci_properties);
2016    k->realize = virtio_pci_realize;
2017    k->exit = virtio_pci_exit;
2018    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
2019    k->revision = VIRTIO_PCI_ABI_VERSION;
2020    k->class_id = PCI_CLASS_OTHERS;
2021    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
2022                                    &vpciklass->parent_dc_realize);
2023    dc->reset = virtio_pci_reset;
2024}
2025
2026static const TypeInfo virtio_pci_info = {
2027    .name          = TYPE_VIRTIO_PCI,
2028    .parent        = TYPE_PCI_DEVICE,
2029    .instance_size = sizeof(VirtIOPCIProxy),
2030    .class_init    = virtio_pci_class_init,
2031    .class_size    = sizeof(VirtioPCIClass),
2032    .abstract      = true,
2033};
2034
2035static Property virtio_pci_generic_properties[] = {
2036    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
2037                            ON_OFF_AUTO_AUTO),
2038    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
2039    DEFINE_PROP_END_OF_LIST(),
2040};
2041
2042static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
2043{
2044    const VirtioPCIDeviceTypeInfo *t = data;
2045    if (t->class_init) {
2046        t->class_init(klass, NULL);
2047    }
2048}
2049
2050static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
2051{
2052    DeviceClass *dc = DEVICE_CLASS(klass);
2053
2054    device_class_set_props(dc, virtio_pci_generic_properties);
2055}
2056
2057static void virtio_pci_transitional_instance_init(Object *obj)
2058{
2059    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2060
2061    proxy->disable_legacy = ON_OFF_AUTO_OFF;
2062    proxy->disable_modern = false;
2063}
2064
2065static void virtio_pci_non_transitional_instance_init(Object *obj)
2066{
2067    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2068
2069    proxy->disable_legacy = ON_OFF_AUTO_ON;
2070    proxy->disable_modern = false;
2071}
2072
2073void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
2074{
2075    char *base_name = NULL;
2076    TypeInfo base_type_info = {
2077        .name          = t->base_name,
2078        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
2079        .instance_size = t->instance_size,
2080        .instance_init = t->instance_init,
2081        .class_size    = t->class_size,
2082        .abstract      = true,
2083        .interfaces    = t->interfaces,
2084    };
2085    TypeInfo generic_type_info = {
2086        .name = t->generic_name,
2087        .parent = base_type_info.name,
2088        .class_init = virtio_pci_generic_class_init,
2089        .interfaces = (InterfaceInfo[]) {
2090            { INTERFACE_PCIE_DEVICE },
2091            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2092            { }
2093        },
2094    };
2095
2096    if (!base_type_info.name) {
2097        /* No base type -> register a single generic device type */
2098        /* use intermediate %s-base-type to add generic device props */
2099        base_name = g_strdup_printf("%s-base-type", t->generic_name);
2100        base_type_info.name = base_name;
2101        base_type_info.class_init = virtio_pci_generic_class_init;
2102
2103        generic_type_info.parent = base_name;
2104        generic_type_info.class_init = virtio_pci_base_class_init;
2105        generic_type_info.class_data = (void *)t;
2106
2107        assert(!t->non_transitional_name);
2108        assert(!t->transitional_name);
2109    } else {
2110        base_type_info.class_init = virtio_pci_base_class_init;
2111        base_type_info.class_data = (void *)t;
2112    }
2113
2114    type_register(&base_type_info);
2115    if (generic_type_info.name) {
2116        type_register(&generic_type_info);
2117    }
2118
2119    if (t->non_transitional_name) {
2120        const TypeInfo non_transitional_type_info = {
2121            .name          = t->non_transitional_name,
2122            .parent        = base_type_info.name,
2123            .instance_init = virtio_pci_non_transitional_instance_init,
2124            .interfaces = (InterfaceInfo[]) {
2125                { INTERFACE_PCIE_DEVICE },
2126                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2127                { }
2128            },
2129        };
2130        type_register(&non_transitional_type_info);
2131    }
2132
2133    if (t->transitional_name) {
2134        const TypeInfo transitional_type_info = {
2135            .name          = t->transitional_name,
2136            .parent        = base_type_info.name,
2137            .instance_init = virtio_pci_transitional_instance_init,
2138            .interfaces = (InterfaceInfo[]) {
2139                /*
2140                 * Transitional virtio devices work only as Conventional PCI
2141                 * devices because they require PIO ports.
2142                 */
2143                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2144                { }
2145            },
2146        };
2147        type_register(&transitional_type_info);
2148    }
2149    g_free(base_name);
2150}
2151
2152unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
2153{
2154    /*
2155     * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
2156     * virtqueue buffers can handle their completion. When a different vCPU
2157     * handles completion it may need to IPI the vCPU that submitted the
2158     * request and this adds overhead.
2159     *
2160     * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
2161     * guests with very many vCPUs and a device that is only used by a few
2162     * vCPUs. Unfortunately optimizing that case requires manual pinning inside
2163     * the guest, so those users might as well manually set the number of
2164     * queues. There is no upper limit that can be applied automatically and
2165     * doing so arbitrarily would result in a sudden performance drop once the
2166     * threshold number of vCPUs is exceeded.
2167     */
2168    unsigned num_queues = current_machine->smp.cpus;
2169
2170    /*
2171     * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
2172     * config change interrupt and the fixed virtqueues must be taken into
2173     * account too.
2174     */
2175    num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
2176
2177    /*
2178     * There is a limit to how many virtqueues a device can have.
2179     */
2180    return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
2181}
2182
2183/* virtio-pci-bus */
2184
2185static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2186                               VirtIOPCIProxy *dev)
2187{
2188    DeviceState *qdev = DEVICE(dev);
2189    char virtio_bus_name[] = "virtio-bus";
2190
2191    qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
2192}
2193
2194static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
2195{
2196    BusClass *bus_class = BUS_CLASS(klass);
2197    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2198    bus_class->max_dev = 1;
2199    k->notify = virtio_pci_notify;
2200    k->save_config = virtio_pci_save_config;
2201    k->load_config = virtio_pci_load_config;
2202    k->save_queue = virtio_pci_save_queue;
2203    k->load_queue = virtio_pci_load_queue;
2204    k->save_extra_state = virtio_pci_save_extra_state;
2205    k->load_extra_state = virtio_pci_load_extra_state;
2206    k->has_extra_state = virtio_pci_has_extra_state;
2207    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2208    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2209    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2210    k->vmstate_change = virtio_pci_vmstate_change;
2211    k->pre_plugged = virtio_pci_pre_plugged;
2212    k->device_plugged = virtio_pci_device_plugged;
2213    k->device_unplugged = virtio_pci_device_unplugged;
2214    k->query_nvectors = virtio_pci_query_nvectors;
2215    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2216    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2217    k->get_dma_as = virtio_pci_get_dma_as;
2218    k->iommu_enabled = virtio_pci_iommu_enabled;
2219    k->queue_enabled = virtio_pci_queue_enabled;
2220}
2221
2222static const TypeInfo virtio_pci_bus_info = {
2223    .name          = TYPE_VIRTIO_PCI_BUS,
2224    .parent        = TYPE_VIRTIO_BUS,
2225    .instance_size = sizeof(VirtioPCIBusState),
2226    .class_size    = sizeof(VirtioPCIBusClass),
2227    .class_init    = virtio_pci_bus_class_init,
2228};
2229
2230static void virtio_pci_register_types(void)
2231{
2232    /* Base types: */
2233    type_register_static(&virtio_pci_bus_info);
2234    type_register_static(&virtio_pci_info);
2235}
2236
2237type_init(virtio_pci_register_types)
2238
2239