qemu/hw/virtio/virtio-pci.c
<<
>>
Prefs
   1/*
   2 * Virtio PCI Bindings
   3 *
   4 * Copyright IBM, Corp. 2007
   5 * Copyright (c) 2009 CodeSourcery
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Paul Brook        <paul@codesourcery.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 *
  14 * Contributions after 2012-01-13 are licensed under the terms of the
  15 * GNU GPL, version 2 or (at your option) any later version.
  16 */
  17
  18#include "qemu/osdep.h"
  19
  20#include "exec/memop.h"
  21#include "standard-headers/linux/virtio_pci.h"
  22#include "hw/boards.h"
  23#include "hw/virtio/virtio.h"
  24#include "migration/qemu-file-types.h"
  25#include "hw/pci/pci.h"
  26#include "hw/pci/pci_bus.h"
  27#include "hw/qdev-properties.h"
  28#include "qapi/error.h"
  29#include "qemu/error-report.h"
  30#include "qemu/log.h"
  31#include "qemu/module.h"
  32#include "hw/pci/msi.h"
  33#include "hw/pci/msix.h"
  34#include "hw/loader.h"
  35#include "sysemu/kvm.h"
  36#include "virtio-pci.h"
  37#include "qemu/range.h"
  38#include "hw/virtio/virtio-bus.h"
  39#include "qapi/visitor.h"
  40
  41#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
  42
  43#undef VIRTIO_PCI_CONFIG
  44
  45/* The remaining space is defined by each driver as the per-driver
  46 * configuration space */
  47#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
  48
  49static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
  50                               VirtIOPCIProxy *dev);
  51static void virtio_pci_reset(DeviceState *qdev);
  52
  53/* virtio device */
  54/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
  55static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
  56{
  57    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  58}
  59
  60/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
  61 * be careful and test performance if you change this.
  62 */
  63static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
  64{
  65    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  66}
  67
  68static void virtio_pci_notify(DeviceState *d, uint16_t vector)
  69{
  70    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
  71
  72    if (msix_enabled(&proxy->pci_dev))
  73        msix_notify(&proxy->pci_dev, vector);
  74    else {
  75        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  76        pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
  77    }
  78}
  79
  80static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
  81{
  82    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
  83    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  84
  85    pci_device_save(&proxy->pci_dev, f);
  86    msix_save(&proxy->pci_dev, f);
  87    if (msix_present(&proxy->pci_dev))
  88        qemu_put_be16(f, vdev->config_vector);
  89}
  90
  91static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
  92    .name = "virtio_pci/modern_queue_state",
  93    .version_id = 1,
  94    .minimum_version_id = 1,
  95    .fields = (VMStateField[]) {
  96        VMSTATE_UINT16(num, VirtIOPCIQueue),
  97        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
  98        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
  99        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
 100        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
 101        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
 102        VMSTATE_END_OF_LIST()
 103    }
 104};
 105
 106static bool virtio_pci_modern_state_needed(void *opaque)
 107{
 108    VirtIOPCIProxy *proxy = opaque;
 109
 110    return virtio_pci_modern(proxy);
 111}
 112
 113static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
 114    .name = "virtio_pci/modern_state",
 115    .version_id = 1,
 116    .minimum_version_id = 1,
 117    .needed = &virtio_pci_modern_state_needed,
 118    .fields = (VMStateField[]) {
 119        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
 120        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
 121        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
 122        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
 123                             vmstate_virtio_pci_modern_queue_state,
 124                             VirtIOPCIQueue),
 125        VMSTATE_END_OF_LIST()
 126    }
 127};
 128
 129static const VMStateDescription vmstate_virtio_pci = {
 130    .name = "virtio_pci",
 131    .version_id = 1,
 132    .minimum_version_id = 1,
 133    .minimum_version_id_old = 1,
 134    .fields = (VMStateField[]) {
 135        VMSTATE_END_OF_LIST()
 136    },
 137    .subsections = (const VMStateDescription*[]) {
 138        &vmstate_virtio_pci_modern_state_sub,
 139        NULL
 140    }
 141};
 142
 143static bool virtio_pci_has_extra_state(DeviceState *d)
 144{
 145    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 146
 147    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
 148}
 149
 150static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
 151{
 152    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 153
 154    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
 155}
 156
 157static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
 158{
 159    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 160
 161    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
 162}
 163
 164static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
 165{
 166    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 167    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 168
 169    if (msix_present(&proxy->pci_dev))
 170        qemu_put_be16(f, virtio_queue_vector(vdev, n));
 171}
 172
 173static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
 174{
 175    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 176    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 177
 178    int ret;
 179    ret = pci_device_load(&proxy->pci_dev, f);
 180    if (ret) {
 181        return ret;
 182    }
 183    msix_unuse_all_vectors(&proxy->pci_dev);
 184    msix_load(&proxy->pci_dev, f);
 185    if (msix_present(&proxy->pci_dev)) {
 186        qemu_get_be16s(f, &vdev->config_vector);
 187    } else {
 188        vdev->config_vector = VIRTIO_NO_VECTOR;
 189    }
 190    if (vdev->config_vector != VIRTIO_NO_VECTOR) {
 191        return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
 192    }
 193    return 0;
 194}
 195
 196static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
 197{
 198    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 199    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 200
 201    uint16_t vector;
 202    if (msix_present(&proxy->pci_dev)) {
 203        qemu_get_be16s(f, &vector);
 204    } else {
 205        vector = VIRTIO_NO_VECTOR;
 206    }
 207    virtio_queue_set_vector(vdev, n, vector);
 208    if (vector != VIRTIO_NO_VECTOR) {
 209        return msix_vector_use(&proxy->pci_dev, vector);
 210    }
 211
 212    return 0;
 213}
 214
 215static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
 216{
 217    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 218
 219    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
 220}
 221
 222#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
 223
 224static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
 225{
 226    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
 227        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
 228}
 229
 230static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
 231                                       int n, bool assign)
 232{
 233    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 234    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 235    VirtQueue *vq = virtio_get_queue(vdev, n);
 236    bool legacy = virtio_pci_legacy(proxy);
 237    bool modern = virtio_pci_modern(proxy);
 238    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
 239    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
 240    MemoryRegion *modern_mr = &proxy->notify.mr;
 241    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
 242    MemoryRegion *legacy_mr = &proxy->bar;
 243    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
 244                         virtio_get_queue_index(vq);
 245    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
 246
 247    if (assign) {
 248        if (modern) {
 249            if (fast_mmio) {
 250                memory_region_add_eventfd(modern_mr, modern_addr, 0,
 251                                          false, n, notifier);
 252            } else {
 253                memory_region_add_eventfd(modern_mr, modern_addr, 2,
 254                                          false, n, notifier);
 255            }
 256            if (modern_pio) {
 257                memory_region_add_eventfd(modern_notify_mr, 0, 2,
 258                                              true, n, notifier);
 259            }
 260        }
 261        if (legacy) {
 262            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
 263                                      true, n, notifier);
 264        }
 265    } else {
 266        if (modern) {
 267            if (fast_mmio) {
 268                memory_region_del_eventfd(modern_mr, modern_addr, 0,
 269                                          false, n, notifier);
 270            } else {
 271                memory_region_del_eventfd(modern_mr, modern_addr, 2,
 272                                          false, n, notifier);
 273            }
 274            if (modern_pio) {
 275                memory_region_del_eventfd(modern_notify_mr, 0, 2,
 276                                          true, n, notifier);
 277            }
 278        }
 279        if (legacy) {
 280            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
 281                                      true, n, notifier);
 282        }
 283    }
 284    return 0;
 285}
 286
 287static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 288{
 289    virtio_bus_start_ioeventfd(&proxy->bus);
 290}
 291
 292static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 293{
 294    virtio_bus_stop_ioeventfd(&proxy->bus);
 295}
 296
 297static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 298{
 299    VirtIOPCIProxy *proxy = opaque;
 300    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 301    hwaddr pa;
 302
 303    switch (addr) {
 304    case VIRTIO_PCI_GUEST_FEATURES:
 305        /* Guest does not negotiate properly?  We have to assume nothing. */
 306        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
 307            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
 308        }
 309        virtio_set_features(vdev, val);
 310        break;
 311    case VIRTIO_PCI_QUEUE_PFN:
 312        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 313        if (pa == 0) {
 314            virtio_pci_reset(DEVICE(proxy));
 315        }
 316        else
 317            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
 318        break;
 319    case VIRTIO_PCI_QUEUE_SEL:
 320        if (val < VIRTIO_QUEUE_MAX)
 321            vdev->queue_sel = val;
 322        break;
 323    case VIRTIO_PCI_QUEUE_NOTIFY:
 324        if (val < VIRTIO_QUEUE_MAX) {
 325            virtio_queue_notify(vdev, val);
 326        }
 327        break;
 328    case VIRTIO_PCI_STATUS:
 329        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
 330            virtio_pci_stop_ioeventfd(proxy);
 331        }
 332
 333        virtio_set_status(vdev, val & 0xFF);
 334
 335        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
 336            virtio_pci_start_ioeventfd(proxy);
 337        }
 338
 339        if (vdev->status == 0) {
 340            virtio_pci_reset(DEVICE(proxy));
 341        }
 342
 343        /* Linux before 2.6.34 drives the device without enabling
 344           the PCI device bus master bit. Enable it automatically
 345           for the guest. This is a PCI spec violation but so is
 346           initiating DMA with bus master bit clear. */
 347        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
 348            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
 349                                     proxy->pci_dev.config[PCI_COMMAND] |
 350                                     PCI_COMMAND_MASTER, 1);
 351        }
 352        break;
 353    case VIRTIO_MSI_CONFIG_VECTOR:
 354        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
 355        /* Make it possible for guest to discover an error took place. */
 356        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 357            val = VIRTIO_NO_VECTOR;
 358        vdev->config_vector = val;
 359        break;
 360    case VIRTIO_MSI_QUEUE_VECTOR:
 361        msix_vector_unuse(&proxy->pci_dev,
 362                          virtio_queue_vector(vdev, vdev->queue_sel));
 363        /* Make it possible for guest to discover an error took place. */
 364        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 365            val = VIRTIO_NO_VECTOR;
 366        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
 367        break;
 368    default:
 369        qemu_log_mask(LOG_GUEST_ERROR,
 370                      "%s: unexpected address 0x%x value 0x%x\n",
 371                      __func__, addr, val);
 372        break;
 373    }
 374}
 375
 376static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 377{
 378    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 379    uint32_t ret = 0xFFFFFFFF;
 380
 381    switch (addr) {
 382    case VIRTIO_PCI_HOST_FEATURES:
 383        ret = vdev->host_features;
 384        break;
 385    case VIRTIO_PCI_GUEST_FEATURES:
 386        ret = vdev->guest_features;
 387        break;
 388    case VIRTIO_PCI_QUEUE_PFN:
 389        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
 390              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 391        break;
 392    case VIRTIO_PCI_QUEUE_NUM:
 393        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
 394        break;
 395    case VIRTIO_PCI_QUEUE_SEL:
 396        ret = vdev->queue_sel;
 397        break;
 398    case VIRTIO_PCI_STATUS:
 399        ret = vdev->status;
 400        break;
 401    case VIRTIO_PCI_ISR:
 402        /* reading from the ISR also clears it. */
 403        ret = qatomic_xchg(&vdev->isr, 0);
 404        pci_irq_deassert(&proxy->pci_dev);
 405        break;
 406    case VIRTIO_MSI_CONFIG_VECTOR:
 407        ret = vdev->config_vector;
 408        break;
 409    case VIRTIO_MSI_QUEUE_VECTOR:
 410        ret = virtio_queue_vector(vdev, vdev->queue_sel);
 411        break;
 412    default:
 413        break;
 414    }
 415
 416    return ret;
 417}
 418
 419static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
 420                                       unsigned size)
 421{
 422    VirtIOPCIProxy *proxy = opaque;
 423    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 424    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 425    uint64_t val = 0;
 426    if (addr < config) {
 427        return virtio_ioport_read(proxy, addr);
 428    }
 429    addr -= config;
 430
 431    switch (size) {
 432    case 1:
 433        val = virtio_config_readb(vdev, addr);
 434        break;
 435    case 2:
 436        val = virtio_config_readw(vdev, addr);
 437        if (virtio_is_big_endian(vdev)) {
 438            val = bswap16(val);
 439        }
 440        break;
 441    case 4:
 442        val = virtio_config_readl(vdev, addr);
 443        if (virtio_is_big_endian(vdev)) {
 444            val = bswap32(val);
 445        }
 446        break;
 447    }
 448    return val;
 449}
 450
 451static void virtio_pci_config_write(void *opaque, hwaddr addr,
 452                                    uint64_t val, unsigned size)
 453{
 454    VirtIOPCIProxy *proxy = opaque;
 455    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 456    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 457    if (addr < config) {
 458        virtio_ioport_write(proxy, addr, val);
 459        return;
 460    }
 461    addr -= config;
 462    /*
 463     * Virtio-PCI is odd. Ioports are LE but config space is target native
 464     * endian.
 465     */
 466    switch (size) {
 467    case 1:
 468        virtio_config_writeb(vdev, addr, val);
 469        break;
 470    case 2:
 471        if (virtio_is_big_endian(vdev)) {
 472            val = bswap16(val);
 473        }
 474        virtio_config_writew(vdev, addr, val);
 475        break;
 476    case 4:
 477        if (virtio_is_big_endian(vdev)) {
 478            val = bswap32(val);
 479        }
 480        virtio_config_writel(vdev, addr, val);
 481        break;
 482    }
 483}
 484
 485static const MemoryRegionOps virtio_pci_config_ops = {
 486    .read = virtio_pci_config_read,
 487    .write = virtio_pci_config_write,
 488    .impl = {
 489        .min_access_size = 1,
 490        .max_access_size = 4,
 491    },
 492    .endianness = DEVICE_LITTLE_ENDIAN,
 493};
 494
 495static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
 496                                                 hwaddr *off, int len)
 497{
 498    int i;
 499    VirtIOPCIRegion *reg;
 500
 501    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
 502        reg = &proxy->regs[i];
 503        if (*off >= reg->offset &&
 504            *off + len <= reg->offset + reg->size) {
 505            *off -= reg->offset;
 506            return &reg->mr;
 507        }
 508    }
 509
 510    return NULL;
 511}
 512
 513/* Below are generic functions to do memcpy from/to an address space,
 514 * without byteswaps, with input validation.
 515 *
 516 * As regular address_space_* APIs all do some kind of byteswap at least for
 517 * some host/target combinations, we are forced to explicitly convert to a
 518 * known-endianness integer value.
 519 * It doesn't really matter which endian format to go through, so the code
 520 * below selects the endian that causes the least amount of work on the given
 521 * host.
 522 *
 523 * Note: host pointer must be aligned.
 524 */
 525static
 526void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
 527                                const uint8_t *buf, int len)
 528{
 529    uint64_t val;
 530    MemoryRegion *mr;
 531
 532    /* address_space_* APIs assume an aligned address.
 533     * As address is under guest control, handle illegal values.
 534     */
 535    addr &= ~(len - 1);
 536
 537    mr = virtio_address_space_lookup(proxy, &addr, len);
 538    if (!mr) {
 539        return;
 540    }
 541
 542    /* Make sure caller aligned buf properly */
 543    assert(!(((uintptr_t)buf) & (len - 1)));
 544
 545    switch (len) {
 546    case 1:
 547        val = pci_get_byte(buf);
 548        break;
 549    case 2:
 550        val = pci_get_word(buf);
 551        break;
 552    case 4:
 553        val = pci_get_long(buf);
 554        break;
 555    default:
 556        /* As length is under guest control, handle illegal values. */
 557        return;
 558    }
 559    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
 560                                 MEMTXATTRS_UNSPECIFIED);
 561}
 562
 563static void
 564virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
 565                          uint8_t *buf, int len)
 566{
 567    uint64_t val;
 568    MemoryRegion *mr;
 569
 570    /* address_space_* APIs assume an aligned address.
 571     * As address is under guest control, handle illegal values.
 572     */
 573    addr &= ~(len - 1);
 574
 575    mr = virtio_address_space_lookup(proxy, &addr, len);
 576    if (!mr) {
 577        return;
 578    }
 579
 580    /* Make sure caller aligned buf properly */
 581    assert(!(((uintptr_t)buf) & (len - 1)));
 582
 583    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
 584                                MEMTXATTRS_UNSPECIFIED);
 585    switch (len) {
 586    case 1:
 587        pci_set_byte(buf, val);
 588        break;
 589    case 2:
 590        pci_set_word(buf, val);
 591        break;
 592    case 4:
 593        pci_set_long(buf, val);
 594        break;
 595    default:
 596        /* As length is under guest control, handle illegal values. */
 597        break;
 598    }
 599}
 600
 601static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 602                                uint32_t val, int len)
 603{
 604    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 605    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 606    struct virtio_pci_cfg_cap *cfg;
 607
 608    pci_default_write_config(pci_dev, address, val, len);
 609
 610    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
 611        pcie_cap_flr_write_config(pci_dev, address, val, len);
 612    }
 613
 614    if (range_covers_byte(address, len, PCI_COMMAND)) {
 615        if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
 616            virtio_set_disabled(vdev, true);
 617            virtio_pci_stop_ioeventfd(proxy);
 618            virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
 619        } else {
 620            virtio_set_disabled(vdev, false);
 621        }
 622    }
 623
 624    if (proxy->config_cap &&
 625        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 626                                                                  pci_cfg_data),
 627                       sizeof cfg->pci_cfg_data)) {
 628        uint32_t off;
 629        uint32_t len;
 630
 631        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 632        off = le32_to_cpu(cfg->cap.offset);
 633        len = le32_to_cpu(cfg->cap.length);
 634
 635        if (len == 1 || len == 2 || len == 4) {
 636            assert(len <= sizeof cfg->pci_cfg_data);
 637            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
 638        }
 639    }
 640}
 641
 642static uint32_t virtio_read_config(PCIDevice *pci_dev,
 643                                   uint32_t address, int len)
 644{
 645    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 646    struct virtio_pci_cfg_cap *cfg;
 647
 648    if (proxy->config_cap &&
 649        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 650                                                                  pci_cfg_data),
 651                       sizeof cfg->pci_cfg_data)) {
 652        uint32_t off;
 653        uint32_t len;
 654
 655        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 656        off = le32_to_cpu(cfg->cap.offset);
 657        len = le32_to_cpu(cfg->cap.length);
 658
 659        if (len == 1 || len == 2 || len == 4) {
 660            assert(len <= sizeof cfg->pci_cfg_data);
 661            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
 662        }
 663    }
 664
 665    return pci_default_read_config(pci_dev, address, len);
 666}
 667
 668static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
 669                                        unsigned int queue_no,
 670                                        unsigned int vector)
 671{
 672    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 673    int ret;
 674
 675    if (irqfd->users == 0) {
 676        ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
 677        if (ret < 0) {
 678            return ret;
 679        }
 680        irqfd->virq = ret;
 681    }
 682    irqfd->users++;
 683    return 0;
 684}
 685
 686static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 687                                             unsigned int vector)
 688{
 689    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 690    if (--irqfd->users == 0) {
 691        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
 692    }
 693}
 694
 695static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
 696                                 unsigned int queue_no,
 697                                 unsigned int vector)
 698{
 699    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 700    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 701    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 702    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 703    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 704}
 705
 706static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
 707                                      unsigned int queue_no,
 708                                      unsigned int vector)
 709{
 710    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 711    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 712    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 713    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 714    int ret;
 715
 716    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
 717    assert(ret == 0);
 718}
 719
 720static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 721{
 722    PCIDevice *dev = &proxy->pci_dev;
 723    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 724    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 725    unsigned int vector;
 726    int ret, queue_no;
 727
 728    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 729        if (!virtio_queue_get_num(vdev, queue_no)) {
 730            break;
 731        }
 732        vector = virtio_queue_vector(vdev, queue_no);
 733        if (vector >= msix_nr_vectors_allocated(dev)) {
 734            continue;
 735        }
 736        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
 737        if (ret < 0) {
 738            goto undo;
 739        }
 740        /* If guest supports masking, set up irqfd now.
 741         * Otherwise, delay until unmasked in the frontend.
 742         */
 743        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 744            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 745            if (ret < 0) {
 746                kvm_virtio_pci_vq_vector_release(proxy, vector);
 747                goto undo;
 748            }
 749        }
 750    }
 751    return 0;
 752
 753undo:
 754    while (--queue_no >= 0) {
 755        vector = virtio_queue_vector(vdev, queue_no);
 756        if (vector >= msix_nr_vectors_allocated(dev)) {
 757            continue;
 758        }
 759        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 760            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 761        }
 762        kvm_virtio_pci_vq_vector_release(proxy, vector);
 763    }
 764    return ret;
 765}
 766
 767static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
 768{
 769    PCIDevice *dev = &proxy->pci_dev;
 770    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 771    unsigned int vector;
 772    int queue_no;
 773    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 774
 775    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 776        if (!virtio_queue_get_num(vdev, queue_no)) {
 777            break;
 778        }
 779        vector = virtio_queue_vector(vdev, queue_no);
 780        if (vector >= msix_nr_vectors_allocated(dev)) {
 781            continue;
 782        }
 783        /* If guest supports masking, clean up irqfd now.
 784         * Otherwise, it was cleaned when masked in the frontend.
 785         */
 786        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 787            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 788        }
 789        kvm_virtio_pci_vq_vector_release(proxy, vector);
 790    }
 791}
 792
 793static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
 794                                       unsigned int queue_no,
 795                                       unsigned int vector,
 796                                       MSIMessage msg)
 797{
 798    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 799    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 800    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 801    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 802    VirtIOIRQFD *irqfd;
 803    int ret = 0;
 804
 805    if (proxy->vector_irqfd) {
 806        irqfd = &proxy->vector_irqfd[vector];
 807        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
 808            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
 809                                               &proxy->pci_dev);
 810            if (ret < 0) {
 811                return ret;
 812            }
 813            kvm_irqchip_commit_routes(kvm_state);
 814        }
 815    }
 816
 817    /* If guest supports masking, irqfd is already setup, unmask it.
 818     * Otherwise, set it up now.
 819     */
 820    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 821        k->guest_notifier_mask(vdev, queue_no, false);
 822        /* Test after unmasking to avoid losing events. */
 823        if (k->guest_notifier_pending &&
 824            k->guest_notifier_pending(vdev, queue_no)) {
 825            event_notifier_set(n);
 826        }
 827    } else {
 828        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 829    }
 830    return ret;
 831}
 832
 833static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
 834                                             unsigned int queue_no,
 835                                             unsigned int vector)
 836{
 837    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 838    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 839
 840    /* If guest supports masking, keep irqfd but mask it.
 841     * Otherwise, clean it up now.
 842     */ 
 843    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 844        k->guest_notifier_mask(vdev, queue_no, true);
 845    } else {
 846        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 847    }
 848}
 849
 850static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
 851                                    MSIMessage msg)
 852{
 853    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 854    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 855    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 856    int ret, index, unmasked = 0;
 857
 858    while (vq) {
 859        index = virtio_get_queue_index(vq);
 860        if (!virtio_queue_get_num(vdev, index)) {
 861            break;
 862        }
 863        if (index < proxy->nvqs_with_notifiers) {
 864            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
 865            if (ret < 0) {
 866                goto undo;
 867            }
 868            ++unmasked;
 869        }
 870        vq = virtio_vector_next_queue(vq);
 871    }
 872
 873    return 0;
 874
 875undo:
 876    vq = virtio_vector_first_queue(vdev, vector);
 877    while (vq && unmasked >= 0) {
 878        index = virtio_get_queue_index(vq);
 879        if (index < proxy->nvqs_with_notifiers) {
 880            virtio_pci_vq_vector_mask(proxy, index, vector);
 881            --unmasked;
 882        }
 883        vq = virtio_vector_next_queue(vq);
 884    }
 885    return ret;
 886}
 887
 888static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 889{
 890    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 891    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 892    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 893    int index;
 894
 895    while (vq) {
 896        index = virtio_get_queue_index(vq);
 897        if (!virtio_queue_get_num(vdev, index)) {
 898            break;
 899        }
 900        if (index < proxy->nvqs_with_notifiers) {
 901            virtio_pci_vq_vector_mask(proxy, index, vector);
 902        }
 903        vq = virtio_vector_next_queue(vq);
 904    }
 905}
 906
 907static void virtio_pci_vector_poll(PCIDevice *dev,
 908                                   unsigned int vector_start,
 909                                   unsigned int vector_end)
 910{
 911    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 912    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 913    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 914    int queue_no;
 915    unsigned int vector;
 916    EventNotifier *notifier;
 917    VirtQueue *vq;
 918
 919    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
 920        if (!virtio_queue_get_num(vdev, queue_no)) {
 921            break;
 922        }
 923        vector = virtio_queue_vector(vdev, queue_no);
 924        if (vector < vector_start || vector >= vector_end ||
 925            !msix_is_masked(dev, vector)) {
 926            continue;
 927        }
 928        vq = virtio_get_queue(vdev, queue_no);
 929        notifier = virtio_queue_get_guest_notifier(vq);
 930        if (k->guest_notifier_pending) {
 931            if (k->guest_notifier_pending(vdev, queue_no)) {
 932                msix_set_pending(dev, vector);
 933            }
 934        } else if (event_notifier_test_and_clear(notifier)) {
 935            msix_set_pending(dev, vector);
 936        }
 937    }
 938}
 939
 940static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
 941                                         bool with_irqfd)
 942{
 943    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 944    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 945    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 946    VirtQueue *vq = virtio_get_queue(vdev, n);
 947    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 948
 949    if (assign) {
 950        int r = event_notifier_init(notifier, 0);
 951        if (r < 0) {
 952            return r;
 953        }
 954        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
 955    } else {
 956        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
 957        event_notifier_cleanup(notifier);
 958    }
 959
 960    if (!msix_enabled(&proxy->pci_dev) &&
 961        vdev->use_guest_notifier_mask &&
 962        vdc->guest_notifier_mask) {
 963        vdc->guest_notifier_mask(vdev, n, !assign);
 964    }
 965
 966    return 0;
 967}
 968
 969static bool virtio_pci_query_guest_notifiers(DeviceState *d)
 970{
 971    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 972    return msix_enabled(&proxy->pci_dev);
 973}
 974
 975static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 976{
 977    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 978    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 979    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 980    int r, n;
 981    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
 982        kvm_msi_via_irqfd_enabled();
 983
 984    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
 985
 986    /* When deassigning, pass a consistent nvqs value
 987     * to avoid leaking notifiers.
 988     */
 989    assert(assign || nvqs == proxy->nvqs_with_notifiers);
 990
 991    proxy->nvqs_with_notifiers = nvqs;
 992
 993    /* Must unset vector notifier while guest notifier is still assigned */
 994    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
 995        msix_unset_vector_notifiers(&proxy->pci_dev);
 996        if (proxy->vector_irqfd) {
 997            kvm_virtio_pci_vector_release(proxy, nvqs);
 998            g_free(proxy->vector_irqfd);
 999            proxy->vector_irqfd = NULL;
1000        }
1001    }
1002
1003    for (n = 0; n < nvqs; n++) {
1004        if (!virtio_queue_get_num(vdev, n)) {
1005            break;
1006        }
1007
1008        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1009        if (r < 0) {
1010            goto assign_error;
1011        }
1012    }
1013
1014    /* Must set vector notifier after guest notifier has been assigned */
1015    if ((with_irqfd || k->guest_notifier_mask) && assign) {
1016        if (with_irqfd) {
1017            proxy->vector_irqfd =
1018                g_malloc0(sizeof(*proxy->vector_irqfd) *
1019                          msix_nr_vectors_allocated(&proxy->pci_dev));
1020            r = kvm_virtio_pci_vector_use(proxy, nvqs);
1021            if (r < 0) {
1022                goto assign_error;
1023            }
1024        }
1025        r = msix_set_vector_notifiers(&proxy->pci_dev,
1026                                      virtio_pci_vector_unmask,
1027                                      virtio_pci_vector_mask,
1028                                      virtio_pci_vector_poll);
1029        if (r < 0) {
1030            goto notifiers_error;
1031        }
1032    }
1033
1034    return 0;
1035
1036notifiers_error:
1037    if (with_irqfd) {
1038        assert(assign);
1039        kvm_virtio_pci_vector_release(proxy, nvqs);
1040    }
1041
1042assign_error:
1043    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1044    assert(assign);
1045    while (--n >= 0) {
1046        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1047    }
1048    return r;
1049}
1050
1051static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1052                                           MemoryRegion *mr, bool assign)
1053{
1054    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1055    int offset;
1056
1057    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1058        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1059        return -1;
1060    }
1061
1062    if (assign) {
1063        offset = virtio_pci_queue_mem_mult(proxy) * n;
1064        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1065    } else {
1066        memory_region_del_subregion(&proxy->notify.mr, mr);
1067    }
1068
1069    return 0;
1070}
1071
1072static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1073{
1074    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1075    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1076
1077    if (running) {
1078        /* Old QEMU versions did not set bus master enable on status write.
1079         * Detect DRIVER set and enable it.
1080         */
1081        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1082            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1083            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1084            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1085                                     proxy->pci_dev.config[PCI_COMMAND] |
1086                                     PCI_COMMAND_MASTER, 1);
1087        }
1088        virtio_pci_start_ioeventfd(proxy);
1089    } else {
1090        virtio_pci_stop_ioeventfd(proxy);
1091    }
1092}
1093
1094/*
1095 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1096 */
1097
1098static int virtio_pci_query_nvectors(DeviceState *d)
1099{
1100    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1101
1102    return proxy->nvectors;
1103}
1104
1105static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1106{
1107    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1108    PCIDevice *dev = &proxy->pci_dev;
1109
1110    return pci_get_address_space(dev);
1111}
1112
1113static bool virtio_pci_queue_enabled(DeviceState *d, int n)
1114{
1115    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1116    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1117
1118    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1119        return proxy->vqs[n].enabled;
1120    }
1121
1122    return virtio_queue_enabled_legacy(vdev, n);
1123}
1124
1125static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1126                                   struct virtio_pci_cap *cap)
1127{
1128    PCIDevice *dev = &proxy->pci_dev;
1129    int offset;
1130
1131    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1132                                cap->cap_len, &error_abort);
1133
1134    assert(cap->cap_len >= sizeof *cap);
1135    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1136           cap->cap_len - PCI_CAP_FLAGS);
1137
1138    return offset;
1139}
1140
1141static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1142                                       unsigned size)
1143{
1144    VirtIOPCIProxy *proxy = opaque;
1145    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1146    uint32_t val = 0;
1147    int i;
1148
1149    switch (addr) {
1150    case VIRTIO_PCI_COMMON_DFSELECT:
1151        val = proxy->dfselect;
1152        break;
1153    case VIRTIO_PCI_COMMON_DF:
1154        if (proxy->dfselect <= 1) {
1155            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1156
1157            val = (vdev->host_features & ~vdc->legacy_features) >>
1158                (32 * proxy->dfselect);
1159        }
1160        break;
1161    case VIRTIO_PCI_COMMON_GFSELECT:
1162        val = proxy->gfselect;
1163        break;
1164    case VIRTIO_PCI_COMMON_GF:
1165        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1166            val = proxy->guest_features[proxy->gfselect];
1167        }
1168        break;
1169    case VIRTIO_PCI_COMMON_MSIX:
1170        val = vdev->config_vector;
1171        break;
1172    case VIRTIO_PCI_COMMON_NUMQ:
1173        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1174            if (virtio_queue_get_num(vdev, i)) {
1175                val = i + 1;
1176            }
1177        }
1178        break;
1179    case VIRTIO_PCI_COMMON_STATUS:
1180        val = vdev->status;
1181        break;
1182    case VIRTIO_PCI_COMMON_CFGGENERATION:
1183        val = vdev->generation;
1184        break;
1185    case VIRTIO_PCI_COMMON_Q_SELECT:
1186        val = vdev->queue_sel;
1187        break;
1188    case VIRTIO_PCI_COMMON_Q_SIZE:
1189        val = virtio_queue_get_num(vdev, vdev->queue_sel);
1190        break;
1191    case VIRTIO_PCI_COMMON_Q_MSIX:
1192        val = virtio_queue_vector(vdev, vdev->queue_sel);
1193        break;
1194    case VIRTIO_PCI_COMMON_Q_ENABLE:
1195        val = proxy->vqs[vdev->queue_sel].enabled;
1196        break;
1197    case VIRTIO_PCI_COMMON_Q_NOFF:
1198        /* Simply map queues in order */
1199        val = vdev->queue_sel;
1200        break;
1201    case VIRTIO_PCI_COMMON_Q_DESCLO:
1202        val = proxy->vqs[vdev->queue_sel].desc[0];
1203        break;
1204    case VIRTIO_PCI_COMMON_Q_DESCHI:
1205        val = proxy->vqs[vdev->queue_sel].desc[1];
1206        break;
1207    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1208        val = proxy->vqs[vdev->queue_sel].avail[0];
1209        break;
1210    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1211        val = proxy->vqs[vdev->queue_sel].avail[1];
1212        break;
1213    case VIRTIO_PCI_COMMON_Q_USEDLO:
1214        val = proxy->vqs[vdev->queue_sel].used[0];
1215        break;
1216    case VIRTIO_PCI_COMMON_Q_USEDHI:
1217        val = proxy->vqs[vdev->queue_sel].used[1];
1218        break;
1219    default:
1220        val = 0;
1221    }
1222
1223    return val;
1224}
1225
1226static void virtio_pci_common_write(void *opaque, hwaddr addr,
1227                                    uint64_t val, unsigned size)
1228{
1229    VirtIOPCIProxy *proxy = opaque;
1230    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1231
1232    switch (addr) {
1233    case VIRTIO_PCI_COMMON_DFSELECT:
1234        proxy->dfselect = val;
1235        break;
1236    case VIRTIO_PCI_COMMON_GFSELECT:
1237        proxy->gfselect = val;
1238        break;
1239    case VIRTIO_PCI_COMMON_GF:
1240        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1241            proxy->guest_features[proxy->gfselect] = val;
1242            virtio_set_features(vdev,
1243                                (((uint64_t)proxy->guest_features[1]) << 32) |
1244                                proxy->guest_features[0]);
1245        }
1246        break;
1247    case VIRTIO_PCI_COMMON_MSIX:
1248        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1249        /* Make it possible for guest to discover an error took place. */
1250        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1251            val = VIRTIO_NO_VECTOR;
1252        }
1253        vdev->config_vector = val;
1254        break;
1255    case VIRTIO_PCI_COMMON_STATUS:
1256        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1257            virtio_pci_stop_ioeventfd(proxy);
1258        }
1259
1260        virtio_set_status(vdev, val & 0xFF);
1261
1262        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1263            virtio_pci_start_ioeventfd(proxy);
1264        }
1265
1266        if (vdev->status == 0) {
1267            virtio_pci_reset(DEVICE(proxy));
1268        }
1269
1270        break;
1271    case VIRTIO_PCI_COMMON_Q_SELECT:
1272        if (val < VIRTIO_QUEUE_MAX) {
1273            vdev->queue_sel = val;
1274        }
1275        break;
1276    case VIRTIO_PCI_COMMON_Q_SIZE:
1277        proxy->vqs[vdev->queue_sel].num = val;
1278        virtio_queue_set_num(vdev, vdev->queue_sel,
1279                             proxy->vqs[vdev->queue_sel].num);
1280        break;
1281    case VIRTIO_PCI_COMMON_Q_MSIX:
1282        msix_vector_unuse(&proxy->pci_dev,
1283                          virtio_queue_vector(vdev, vdev->queue_sel));
1284        /* Make it possible for guest to discover an error took place. */
1285        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1286            val = VIRTIO_NO_VECTOR;
1287        }
1288        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
1289        break;
1290    case VIRTIO_PCI_COMMON_Q_ENABLE:
1291        if (val == 1) {
1292            virtio_queue_set_num(vdev, vdev->queue_sel,
1293                                 proxy->vqs[vdev->queue_sel].num);
1294            virtio_queue_set_rings(vdev, vdev->queue_sel,
1295                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1296                       proxy->vqs[vdev->queue_sel].desc[0],
1297                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1298                       proxy->vqs[vdev->queue_sel].avail[0],
1299                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1300                       proxy->vqs[vdev->queue_sel].used[0]);
1301            proxy->vqs[vdev->queue_sel].enabled = 1;
1302        } else {
1303            virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
1304        }
1305        break;
1306    case VIRTIO_PCI_COMMON_Q_DESCLO:
1307        proxy->vqs[vdev->queue_sel].desc[0] = val;
1308        break;
1309    case VIRTIO_PCI_COMMON_Q_DESCHI:
1310        proxy->vqs[vdev->queue_sel].desc[1] = val;
1311        break;
1312    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1313        proxy->vqs[vdev->queue_sel].avail[0] = val;
1314        break;
1315    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1316        proxy->vqs[vdev->queue_sel].avail[1] = val;
1317        break;
1318    case VIRTIO_PCI_COMMON_Q_USEDLO:
1319        proxy->vqs[vdev->queue_sel].used[0] = val;
1320        break;
1321    case VIRTIO_PCI_COMMON_Q_USEDHI:
1322        proxy->vqs[vdev->queue_sel].used[1] = val;
1323        break;
1324    default:
1325        break;
1326    }
1327}
1328
1329
1330static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1331                                       unsigned size)
1332{
1333    return 0;
1334}
1335
1336static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1337                                    uint64_t val, unsigned size)
1338{
1339    VirtIOPCIProxy *proxy = opaque;
1340    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1341
1342    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1343
1344    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1345        virtio_queue_notify(vdev, queue);
1346    }
1347}
1348
1349static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1350                                        uint64_t val, unsigned size)
1351{
1352    VirtIOPCIProxy *proxy = opaque;
1353    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1354
1355    unsigned queue = val;
1356
1357    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1358        virtio_queue_notify(vdev, queue);
1359    }
1360}
1361
1362static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1363                                    unsigned size)
1364{
1365    VirtIOPCIProxy *proxy = opaque;
1366    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1367    uint64_t val;
1368
1369    if (vdev == NULL) {
1370        return 0;
1371    }
1372
1373    val = qatomic_xchg(&vdev->isr, 0);
1374    pci_irq_deassert(&proxy->pci_dev);
1375    return val;
1376}
1377
1378static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1379                                 uint64_t val, unsigned size)
1380{
1381}
1382
1383static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1384                                       unsigned size)
1385{
1386    VirtIOPCIProxy *proxy = opaque;
1387    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1388    uint64_t val;
1389
1390    if (vdev == NULL) {
1391        return 0;
1392    }
1393
1394    switch (size) {
1395    case 1:
1396        val = virtio_config_modern_readb(vdev, addr);
1397        break;
1398    case 2:
1399        val = virtio_config_modern_readw(vdev, addr);
1400        break;
1401    case 4:
1402        val = virtio_config_modern_readl(vdev, addr);
1403        break;
1404    default:
1405        val = 0;
1406        break;
1407    }
1408    return val;
1409}
1410
1411static void virtio_pci_device_write(void *opaque, hwaddr addr,
1412                                    uint64_t val, unsigned size)
1413{
1414    VirtIOPCIProxy *proxy = opaque;
1415    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1416
1417    if (vdev == NULL) {
1418        return;
1419    }
1420
1421    switch (size) {
1422    case 1:
1423        virtio_config_modern_writeb(vdev, addr, val);
1424        break;
1425    case 2:
1426        virtio_config_modern_writew(vdev, addr, val);
1427        break;
1428    case 4:
1429        virtio_config_modern_writel(vdev, addr, val);
1430        break;
1431    }
1432}
1433
1434static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
1435                                           const char *vdev_name)
1436{
1437    static const MemoryRegionOps common_ops = {
1438        .read = virtio_pci_common_read,
1439        .write = virtio_pci_common_write,
1440        .impl = {
1441            .min_access_size = 1,
1442            .max_access_size = 4,
1443        },
1444        .endianness = DEVICE_LITTLE_ENDIAN,
1445    };
1446    static const MemoryRegionOps isr_ops = {
1447        .read = virtio_pci_isr_read,
1448        .write = virtio_pci_isr_write,
1449        .impl = {
1450            .min_access_size = 1,
1451            .max_access_size = 4,
1452        },
1453        .endianness = DEVICE_LITTLE_ENDIAN,
1454    };
1455    static const MemoryRegionOps device_ops = {
1456        .read = virtio_pci_device_read,
1457        .write = virtio_pci_device_write,
1458        .impl = {
1459            .min_access_size = 1,
1460            .max_access_size = 4,
1461        },
1462        .endianness = DEVICE_LITTLE_ENDIAN,
1463    };
1464    static const MemoryRegionOps notify_ops = {
1465        .read = virtio_pci_notify_read,
1466        .write = virtio_pci_notify_write,
1467        .impl = {
1468            .min_access_size = 1,
1469            .max_access_size = 4,
1470        },
1471        .endianness = DEVICE_LITTLE_ENDIAN,
1472    };
1473    static const MemoryRegionOps notify_pio_ops = {
1474        .read = virtio_pci_notify_read,
1475        .write = virtio_pci_notify_write_pio,
1476        .impl = {
1477            .min_access_size = 1,
1478            .max_access_size = 4,
1479        },
1480        .endianness = DEVICE_LITTLE_ENDIAN,
1481    };
1482    g_autoptr(GString) name = g_string_new(NULL);
1483
1484    g_string_printf(name, "virtio-pci-common-%s", vdev_name);
1485    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1486                          &common_ops,
1487                          proxy,
1488                          name->str,
1489                          proxy->common.size);
1490
1491    g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
1492    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1493                          &isr_ops,
1494                          proxy,
1495                          name->str,
1496                          proxy->isr.size);
1497
1498    g_string_printf(name, "virtio-pci-device-%s", vdev_name);
1499    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1500                          &device_ops,
1501                          proxy,
1502                          name->str,
1503                          proxy->device.size);
1504
1505    g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
1506    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1507                          &notify_ops,
1508                          proxy,
1509                          name->str,
1510                          proxy->notify.size);
1511
1512    g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
1513    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1514                          &notify_pio_ops,
1515                          proxy,
1516                          name->str,
1517                          proxy->notify_pio.size);
1518}
1519
1520static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1521                                         VirtIOPCIRegion *region,
1522                                         struct virtio_pci_cap *cap,
1523                                         MemoryRegion *mr,
1524                                         uint8_t bar)
1525{
1526    memory_region_add_subregion(mr, region->offset, &region->mr);
1527
1528    cap->cfg_type = region->type;
1529    cap->bar = bar;
1530    cap->offset = cpu_to_le32(region->offset);
1531    cap->length = cpu_to_le32(region->size);
1532    virtio_pci_add_mem_cap(proxy, cap);
1533
1534}
1535
1536static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1537                                             VirtIOPCIRegion *region,
1538                                             struct virtio_pci_cap *cap)
1539{
1540    virtio_pci_modern_region_map(proxy, region, cap,
1541                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1542}
1543
1544static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1545                                            VirtIOPCIRegion *region,
1546                                            struct virtio_pci_cap *cap)
1547{
1548    virtio_pci_modern_region_map(proxy, region, cap,
1549                                 &proxy->io_bar, proxy->modern_io_bar_idx);
1550}
1551
1552static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1553                                               VirtIOPCIRegion *region)
1554{
1555    memory_region_del_subregion(&proxy->modern_bar,
1556                                &region->mr);
1557}
1558
1559static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1560                                              VirtIOPCIRegion *region)
1561{
1562    memory_region_del_subregion(&proxy->io_bar,
1563                                &region->mr);
1564}
1565
1566static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1567{
1568    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1569    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1570
1571    if (virtio_pci_modern(proxy)) {
1572        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1573    }
1574
1575    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1576}
1577
1578/* This is called by virtio-bus just after the device is plugged. */
1579static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1580{
1581    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1582    VirtioBusState *bus = &proxy->bus;
1583    bool legacy = virtio_pci_legacy(proxy);
1584    bool modern;
1585    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1586    uint8_t *config;
1587    uint32_t size;
1588    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1589
1590    /*
1591     * Virtio capabilities present without
1592     * VIRTIO_F_VERSION_1 confuses guests
1593     */
1594    if (!proxy->ignore_backend_features &&
1595            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1596        virtio_pci_disable_modern(proxy);
1597
1598        if (!legacy) {
1599            error_setg(errp, "Device doesn't support modern mode, and legacy"
1600                             " mode is disabled");
1601            error_append_hint(errp, "Set disable-legacy to off\n");
1602
1603            return;
1604        }
1605    }
1606
1607    modern = virtio_pci_modern(proxy);
1608
1609    config = proxy->pci_dev.config;
1610    if (proxy->class_code) {
1611        pci_config_set_class(config, proxy->class_code);
1612    }
1613
1614    if (legacy) {
1615        if (!virtio_legacy_allowed(vdev)) {
1616            /*
1617             * To avoid migration issues, we allow legacy mode when legacy
1618             * check is disabled in the old machine types (< 5.1).
1619             */
1620            if (virtio_legacy_check_disabled(vdev)) {
1621                warn_report("device is modern-only, but for backward "
1622                            "compatibility legacy is allowed");
1623            } else {
1624                error_setg(errp,
1625                           "device is modern-only, use disable-legacy=on");
1626                return;
1627            }
1628        }
1629        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1630            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
1631                       " neither legacy nor transitional device");
1632            return ;
1633        }
1634        /*
1635         * Legacy and transitional devices use specific subsystem IDs.
1636         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
1637         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
1638         */
1639        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
1640    } else {
1641        /* pure virtio-1.0 */
1642        pci_set_word(config + PCI_VENDOR_ID,
1643                     PCI_VENDOR_ID_REDHAT_QUMRANET);
1644        pci_set_word(config + PCI_DEVICE_ID,
1645                     0x1040 + virtio_bus_get_vdev_id(bus));
1646        pci_config_set_revision(config, 1);
1647    }
1648    config[PCI_INTERRUPT_PIN] = 1;
1649
1650
1651    if (modern) {
1652        struct virtio_pci_cap cap = {
1653            .cap_len = sizeof cap,
1654        };
1655        struct virtio_pci_notify_cap notify = {
1656            .cap.cap_len = sizeof notify,
1657            .notify_off_multiplier =
1658                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
1659        };
1660        struct virtio_pci_cfg_cap cfg = {
1661            .cap.cap_len = sizeof cfg,
1662            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
1663        };
1664        struct virtio_pci_notify_cap notify_pio = {
1665            .cap.cap_len = sizeof notify,
1666            .notify_off_multiplier = cpu_to_le32(0x0),
1667        };
1668
1669        struct virtio_pci_cfg_cap *cfg_mask;
1670
1671        virtio_pci_modern_regions_init(proxy, vdev->name);
1672
1673        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
1674        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
1675        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
1676        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
1677
1678        if (modern_pio) {
1679            memory_region_init(&proxy->io_bar, OBJECT(proxy),
1680                               "virtio-pci-io", 0x4);
1681
1682            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
1683                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
1684
1685            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
1686                                            &notify_pio.cap);
1687        }
1688
1689        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
1690                         PCI_BASE_ADDRESS_SPACE_MEMORY |
1691                         PCI_BASE_ADDRESS_MEM_PREFETCH |
1692                         PCI_BASE_ADDRESS_MEM_TYPE_64,
1693                         &proxy->modern_bar);
1694
1695        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
1696        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
1697        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
1698        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
1699        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
1700        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
1701    }
1702
1703    if (proxy->nvectors) {
1704        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
1705                                          proxy->msix_bar_idx, NULL);
1706        if (err) {
1707            /* Notice when a system that supports MSIx can't initialize it */
1708            if (err != -ENOTSUP) {
1709                warn_report("unable to init msix vectors to %" PRIu32,
1710                            proxy->nvectors);
1711            }
1712            proxy->nvectors = 0;
1713        }
1714    }
1715
1716    proxy->pci_dev.config_write = virtio_write_config;
1717    proxy->pci_dev.config_read = virtio_read_config;
1718
1719    if (legacy) {
1720        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
1721            + virtio_bus_get_vdev_config_len(bus);
1722        size = pow2ceil(size);
1723
1724        memory_region_init_io(&proxy->bar, OBJECT(proxy),
1725                              &virtio_pci_config_ops,
1726                              proxy, "virtio-pci", size);
1727
1728        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
1729                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
1730    }
1731}
1732
1733static void virtio_pci_device_unplugged(DeviceState *d)
1734{
1735    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1736    bool modern = virtio_pci_modern(proxy);
1737    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1738
1739    virtio_pci_stop_ioeventfd(proxy);
1740
1741    if (modern) {
1742        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
1743        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
1744        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
1745        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
1746        if (modern_pio) {
1747            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
1748        }
1749    }
1750}
1751
1752static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
1753{
1754    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1755    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
1756    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1757                     !pci_bus_is_root(pci_get_bus(pci_dev));
1758
1759    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
1760        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
1761    }
1762
1763    /*
1764     * virtio pci bar layout used by default.
1765     * subclasses can re-arrange things if needed.
1766     *
1767     *   region 0   --  virtio legacy io bar
1768     *   region 1   --  msi-x bar
1769     *   region 2   --  virtio modern io bar (off by default)
1770     *   region 4+5 --  virtio modern memory (64bit) bar
1771     *
1772     */
1773    proxy->legacy_io_bar_idx  = 0;
1774    proxy->msix_bar_idx       = 1;
1775    proxy->modern_io_bar_idx  = 2;
1776    proxy->modern_mem_bar_idx = 4;
1777
1778    proxy->common.offset = 0x0;
1779    proxy->common.size = 0x1000;
1780    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
1781
1782    proxy->isr.offset = 0x1000;
1783    proxy->isr.size = 0x1000;
1784    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
1785
1786    proxy->device.offset = 0x2000;
1787    proxy->device.size = 0x1000;
1788    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
1789
1790    proxy->notify.offset = 0x3000;
1791    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
1792    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1793
1794    proxy->notify_pio.offset = 0x0;
1795    proxy->notify_pio.size = 0x4;
1796    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1797
1798    /* subclasses can enforce modern, so do this unconditionally */
1799    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
1800                       /* PCI BAR regions must be powers of 2 */
1801                       pow2ceil(proxy->notify.offset + proxy->notify.size));
1802
1803    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
1804        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
1805    }
1806
1807    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
1808        error_setg(errp, "device cannot work as neither modern nor legacy mode"
1809                   " is enabled");
1810        error_append_hint(errp, "Set either disable-modern or disable-legacy"
1811                          " to off\n");
1812        return;
1813    }
1814
1815    if (pcie_port && pci_is_express(pci_dev)) {
1816        int pos;
1817        uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
1818
1819        pos = pcie_endpoint_cap_init(pci_dev, 0);
1820        assert(pos > 0);
1821
1822        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
1823                                 PCI_PM_SIZEOF, errp);
1824        if (pos < 0) {
1825            return;
1826        }
1827
1828        pci_dev->exp.pm_cap = pos;
1829
1830        /*
1831         * Indicates that this function complies with revision 1.2 of the
1832         * PCI Power Management Interface Specification.
1833         */
1834        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
1835
1836        if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
1837            pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
1838                          PCI_ERR_SIZEOF, NULL);
1839            last_pcie_cap_offset += PCI_ERR_SIZEOF;
1840        }
1841
1842        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
1843            /* Init error enabling flags */
1844            pcie_cap_deverr_init(pci_dev);
1845        }
1846
1847        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
1848            /* Init Link Control Register */
1849            pcie_cap_lnkctl_init(pci_dev);
1850        }
1851
1852        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
1853            /* Init Power Management Control Register */
1854            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
1855                         PCI_PM_CTRL_STATE_MASK);
1856        }
1857
1858        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
1859            pcie_ats_init(pci_dev, last_pcie_cap_offset,
1860                          proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
1861            last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
1862        }
1863
1864        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
1865            /* Set Function Level Reset capability bit */
1866            pcie_cap_flr_init(pci_dev);
1867        }
1868    } else {
1869        /*
1870         * make future invocations of pci_is_express() return false
1871         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
1872         */
1873        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
1874    }
1875
1876    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
1877    if (k->realize) {
1878        k->realize(proxy, errp);
1879    }
1880}
1881
1882static void virtio_pci_exit(PCIDevice *pci_dev)
1883{
1884    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1885    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1886                     !pci_bus_is_root(pci_get_bus(pci_dev));
1887
1888    msix_uninit_exclusive_bar(pci_dev);
1889    if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
1890        pci_is_express(pci_dev)) {
1891        pcie_aer_exit(pci_dev);
1892    }
1893}
1894
1895static void virtio_pci_reset(DeviceState *qdev)
1896{
1897    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1898    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
1899    PCIDevice *dev = PCI_DEVICE(qdev);
1900    int i;
1901
1902    virtio_pci_stop_ioeventfd(proxy);
1903    virtio_bus_reset(bus);
1904    msix_unuse_all_vectors(&proxy->pci_dev);
1905
1906    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1907        proxy->vqs[i].enabled = 0;
1908        proxy->vqs[i].num = 0;
1909        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
1910        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
1911        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
1912    }
1913
1914    if (pci_is_express(dev)) {
1915        pcie_cap_deverr_reset(dev);
1916        pcie_cap_lnkctl_reset(dev);
1917
1918        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
1919    }
1920}
1921
1922static Property virtio_pci_properties[] = {
1923    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
1924                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
1925    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
1926                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
1927    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
1928                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
1929    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
1930                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
1931    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
1932                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
1933    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
1934                     ignore_backend_features, false),
1935    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
1936                    VIRTIO_PCI_FLAG_ATS_BIT, false),
1937    DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
1938                    VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
1939    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
1940                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
1941    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
1942                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
1943    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
1944                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
1945    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
1946                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
1947    DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
1948                    VIRTIO_PCI_FLAG_AER_BIT, false),
1949    DEFINE_PROP_END_OF_LIST(),
1950};
1951
1952static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
1953{
1954    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
1955    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1956    PCIDevice *pci_dev = &proxy->pci_dev;
1957
1958    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
1959        virtio_pci_modern(proxy)) {
1960        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
1961    }
1962
1963    vpciklass->parent_dc_realize(qdev, errp);
1964}
1965
1966static void virtio_pci_class_init(ObjectClass *klass, void *data)
1967{
1968    DeviceClass *dc = DEVICE_CLASS(klass);
1969    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1970    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
1971
1972    device_class_set_props(dc, virtio_pci_properties);
1973    k->realize = virtio_pci_realize;
1974    k->exit = virtio_pci_exit;
1975    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
1976    k->revision = VIRTIO_PCI_ABI_VERSION;
1977    k->class_id = PCI_CLASS_OTHERS;
1978    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
1979                                    &vpciklass->parent_dc_realize);
1980    dc->reset = virtio_pci_reset;
1981}
1982
1983static const TypeInfo virtio_pci_info = {
1984    .name          = TYPE_VIRTIO_PCI,
1985    .parent        = TYPE_PCI_DEVICE,
1986    .instance_size = sizeof(VirtIOPCIProxy),
1987    .class_init    = virtio_pci_class_init,
1988    .class_size    = sizeof(VirtioPCIClass),
1989    .abstract      = true,
1990};
1991
1992static Property virtio_pci_generic_properties[] = {
1993    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
1994                            ON_OFF_AUTO_AUTO),
1995    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
1996    DEFINE_PROP_END_OF_LIST(),
1997};
1998
1999static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
2000{
2001    const VirtioPCIDeviceTypeInfo *t = data;
2002    if (t->class_init) {
2003        t->class_init(klass, NULL);
2004    }
2005}
2006
2007static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
2008{
2009    DeviceClass *dc = DEVICE_CLASS(klass);
2010
2011    device_class_set_props(dc, virtio_pci_generic_properties);
2012}
2013
2014static void virtio_pci_transitional_instance_init(Object *obj)
2015{
2016    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2017
2018    proxy->disable_legacy = ON_OFF_AUTO_OFF;
2019    proxy->disable_modern = false;
2020}
2021
2022static void virtio_pci_non_transitional_instance_init(Object *obj)
2023{
2024    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2025
2026    proxy->disable_legacy = ON_OFF_AUTO_ON;
2027    proxy->disable_modern = false;
2028}
2029
2030void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
2031{
2032    char *base_name = NULL;
2033    TypeInfo base_type_info = {
2034        .name          = t->base_name,
2035        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
2036        .instance_size = t->instance_size,
2037        .instance_init = t->instance_init,
2038        .class_size    = t->class_size,
2039        .abstract      = true,
2040        .interfaces    = t->interfaces,
2041    };
2042    TypeInfo generic_type_info = {
2043        .name = t->generic_name,
2044        .parent = base_type_info.name,
2045        .class_init = virtio_pci_generic_class_init,
2046        .interfaces = (InterfaceInfo[]) {
2047            { INTERFACE_PCIE_DEVICE },
2048            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2049            { }
2050        },
2051    };
2052
2053    if (!base_type_info.name) {
2054        /* No base type -> register a single generic device type */
2055        /* use intermediate %s-base-type to add generic device props */
2056        base_name = g_strdup_printf("%s-base-type", t->generic_name);
2057        base_type_info.name = base_name;
2058        base_type_info.class_init = virtio_pci_generic_class_init;
2059
2060        generic_type_info.parent = base_name;
2061        generic_type_info.class_init = virtio_pci_base_class_init;
2062        generic_type_info.class_data = (void *)t;
2063
2064        assert(!t->non_transitional_name);
2065        assert(!t->transitional_name);
2066    } else {
2067        base_type_info.class_init = virtio_pci_base_class_init;
2068        base_type_info.class_data = (void *)t;
2069    }
2070
2071    type_register(&base_type_info);
2072    if (generic_type_info.name) {
2073        type_register(&generic_type_info);
2074    }
2075
2076    if (t->non_transitional_name) {
2077        const TypeInfo non_transitional_type_info = {
2078            .name          = t->non_transitional_name,
2079            .parent        = base_type_info.name,
2080            .instance_init = virtio_pci_non_transitional_instance_init,
2081            .interfaces = (InterfaceInfo[]) {
2082                { INTERFACE_PCIE_DEVICE },
2083                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2084                { }
2085            },
2086        };
2087        type_register(&non_transitional_type_info);
2088    }
2089
2090    if (t->transitional_name) {
2091        const TypeInfo transitional_type_info = {
2092            .name          = t->transitional_name,
2093            .parent        = base_type_info.name,
2094            .instance_init = virtio_pci_transitional_instance_init,
2095            .interfaces = (InterfaceInfo[]) {
2096                /*
2097                 * Transitional virtio devices work only as Conventional PCI
2098                 * devices because they require PIO ports.
2099                 */
2100                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2101                { }
2102            },
2103        };
2104        type_register(&transitional_type_info);
2105    }
2106    g_free(base_name);
2107}
2108
2109unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
2110{
2111    /*
2112     * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
2113     * virtqueue buffers can handle their completion. When a different vCPU
2114     * handles completion it may need to IPI the vCPU that submitted the
2115     * request and this adds overhead.
2116     *
2117     * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
2118     * guests with very many vCPUs and a device that is only used by a few
2119     * vCPUs. Unfortunately optimizing that case requires manual pinning inside
2120     * the guest, so those users might as well manually set the number of
2121     * queues. There is no upper limit that can be applied automatically and
2122     * doing so arbitrarily would result in a sudden performance drop once the
2123     * threshold number of vCPUs is exceeded.
2124     */
2125    unsigned num_queues = current_machine->smp.cpus;
2126
2127    /*
2128     * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
2129     * config change interrupt and the fixed virtqueues must be taken into
2130     * account too.
2131     */
2132    num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
2133
2134    /*
2135     * There is a limit to how many virtqueues a device can have.
2136     */
2137    return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
2138}
2139
2140/* virtio-pci-bus */
2141
2142static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2143                               VirtIOPCIProxy *dev)
2144{
2145    DeviceState *qdev = DEVICE(dev);
2146    char virtio_bus_name[] = "virtio-bus";
2147
2148    qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev,
2149                        virtio_bus_name);
2150}
2151
2152static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
2153{
2154    BusClass *bus_class = BUS_CLASS(klass);
2155    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2156    bus_class->max_dev = 1;
2157    k->notify = virtio_pci_notify;
2158    k->save_config = virtio_pci_save_config;
2159    k->load_config = virtio_pci_load_config;
2160    k->save_queue = virtio_pci_save_queue;
2161    k->load_queue = virtio_pci_load_queue;
2162    k->save_extra_state = virtio_pci_save_extra_state;
2163    k->load_extra_state = virtio_pci_load_extra_state;
2164    k->has_extra_state = virtio_pci_has_extra_state;
2165    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2166    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2167    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2168    k->vmstate_change = virtio_pci_vmstate_change;
2169    k->pre_plugged = virtio_pci_pre_plugged;
2170    k->device_plugged = virtio_pci_device_plugged;
2171    k->device_unplugged = virtio_pci_device_unplugged;
2172    k->query_nvectors = virtio_pci_query_nvectors;
2173    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2174    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2175    k->get_dma_as = virtio_pci_get_dma_as;
2176    k->queue_enabled = virtio_pci_queue_enabled;
2177}
2178
2179static const TypeInfo virtio_pci_bus_info = {
2180    .name          = TYPE_VIRTIO_PCI_BUS,
2181    .parent        = TYPE_VIRTIO_BUS,
2182    .instance_size = sizeof(VirtioPCIBusState),
2183    .class_size    = sizeof(VirtioPCIBusClass),
2184    .class_init    = virtio_pci_bus_class_init,
2185};
2186
2187static void virtio_pci_register_types(void)
2188{
2189    /* Base types: */
2190    type_register_static(&virtio_pci_bus_info);
2191    type_register_static(&virtio_pci_info);
2192}
2193
2194type_init(virtio_pci_register_types)
2195
2196