qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qemu-common.h"
  17#include "cpu.h"
  18#include "trace.h"
  19#include "exec/address-spaces.h"
  20#include "qemu/error-report.h"
  21#include "hw/virtio/virtio.h"
  22#include "qemu/atomic.h"
  23#include "hw/virtio/virtio-bus.h"
  24#include "migration/migration.h"
  25#include "hw/virtio/virtio-access.h"
  26
  27/*
  28 * The alignment to use between consumer and producer parts of vring.
  29 * x86 pagesize again. This is the default, used by transports like PCI
  30 * which don't provide a means for the guest to tell the host the alignment.
  31 */
  32#define VIRTIO_PCI_VRING_ALIGN         4096
  33
  34typedef struct VRingDesc
  35{
  36    uint64_t addr;
  37    uint32_t len;
  38    uint16_t flags;
  39    uint16_t next;
  40} VRingDesc;
  41
  42typedef struct VRingAvail
  43{
  44    uint16_t flags;
  45    uint16_t idx;
  46    uint16_t ring[0];
  47} VRingAvail;
  48
  49typedef struct VRingUsedElem
  50{
  51    uint32_t id;
  52    uint32_t len;
  53} VRingUsedElem;
  54
  55typedef struct VRingUsed
  56{
  57    uint16_t flags;
  58    uint16_t idx;
  59    VRingUsedElem ring[0];
  60} VRingUsed;
  61
  62typedef struct VRing
  63{
  64    unsigned int num;
  65    unsigned int num_default;
  66    unsigned int align;
  67    hwaddr desc;
  68    hwaddr avail;
  69    hwaddr used;
  70} VRing;
  71
  72struct VirtQueue
  73{
  74    VRing vring;
  75
  76    /* Next head to pop */
  77    uint16_t last_avail_idx;
  78
  79    /* Last avail_idx read from VQ. */
  80    uint16_t shadow_avail_idx;
  81
  82    uint16_t used_idx;
  83
  84    /* Last used index value we have signalled on */
  85    uint16_t signalled_used;
  86
  87    /* Last used index value we have signalled on */
  88    bool signalled_used_valid;
  89
  90    /* Notification enabled? */
  91    bool notification;
  92
  93    uint16_t queue_index;
  94
  95    int inuse;
  96
  97    uint16_t vector;
  98    VirtIOHandleOutput handle_output;
  99    VirtIOHandleOutput handle_aio_output;
 100    VirtIODevice *vdev;
 101    EventNotifier guest_notifier;
 102    EventNotifier host_notifier;
 103    QLIST_ENTRY(VirtQueue) node;
 104};
 105
 106/* virt queue functions */
 107void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 108{
 109    VRing *vring = &vdev->vq[n].vring;
 110
 111    if (!vring->desc) {
 112        /* not yet setup -> nothing to do */
 113        return;
 114    }
 115    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 116    vring->used = vring_align(vring->avail +
 117                              offsetof(VRingAvail, ring[vring->num]),
 118                              vring->align);
 119}
 120
 121static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 122                            hwaddr desc_pa, int i)
 123{
 124    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
 125                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
 126    virtio_tswap64s(vdev, &desc->addr);
 127    virtio_tswap32s(vdev, &desc->len);
 128    virtio_tswap16s(vdev, &desc->flags);
 129    virtio_tswap16s(vdev, &desc->next);
 130}
 131
 132static inline uint16_t vring_avail_flags(VirtQueue *vq)
 133{
 134    hwaddr pa;
 135    pa = vq->vring.avail + offsetof(VRingAvail, flags);
 136    return virtio_lduw_phys(vq->vdev, pa);
 137}
 138
 139static inline uint16_t vring_avail_idx(VirtQueue *vq)
 140{
 141    hwaddr pa;
 142    pa = vq->vring.avail + offsetof(VRingAvail, idx);
 143    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
 144    return vq->shadow_avail_idx;
 145}
 146
 147static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 148{
 149    hwaddr pa;
 150    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
 151    return virtio_lduw_phys(vq->vdev, pa);
 152}
 153
 154static inline uint16_t vring_get_used_event(VirtQueue *vq)
 155{
 156    return vring_avail_ring(vq, vq->vring.num);
 157}
 158
 159static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 160                                    int i)
 161{
 162    hwaddr pa;
 163    virtio_tswap32s(vq->vdev, &uelem->id);
 164    virtio_tswap32s(vq->vdev, &uelem->len);
 165    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
 166    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
 167                       (void *)uelem, sizeof(VRingUsedElem));
 168}
 169
 170static uint16_t vring_used_idx(VirtQueue *vq)
 171{
 172    hwaddr pa;
 173    pa = vq->vring.used + offsetof(VRingUsed, idx);
 174    return virtio_lduw_phys(vq->vdev, pa);
 175}
 176
 177static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 178{
 179    hwaddr pa;
 180    pa = vq->vring.used + offsetof(VRingUsed, idx);
 181    virtio_stw_phys(vq->vdev, pa, val);
 182    vq->used_idx = val;
 183}
 184
 185static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 186{
 187    VirtIODevice *vdev = vq->vdev;
 188    hwaddr pa;
 189    pa = vq->vring.used + offsetof(VRingUsed, flags);
 190    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
 191}
 192
 193static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 194{
 195    VirtIODevice *vdev = vq->vdev;
 196    hwaddr pa;
 197    pa = vq->vring.used + offsetof(VRingUsed, flags);
 198    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
 199}
 200
 201static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 202{
 203    hwaddr pa;
 204    if (!vq->notification) {
 205        return;
 206    }
 207    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
 208    virtio_stw_phys(vq->vdev, pa, val);
 209}
 210
 211void virtio_queue_set_notification(VirtQueue *vq, int enable)
 212{
 213    vq->notification = enable;
 214    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 215        vring_set_avail_event(vq, vring_avail_idx(vq));
 216    } else if (enable) {
 217        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 218    } else {
 219        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 220    }
 221    if (enable) {
 222        /* Expose avail event/used flags before caller checks the avail idx. */
 223        smp_mb();
 224    }
 225}
 226
 227int virtio_queue_ready(VirtQueue *vq)
 228{
 229    return vq->vring.avail != 0;
 230}
 231
 232/* Fetch avail_idx from VQ memory only when we really need to know if
 233 * guest has added some buffers. */
 234int virtio_queue_empty(VirtQueue *vq)
 235{
 236    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 237        return 0;
 238    }
 239
 240    return vring_avail_idx(vq) == vq->last_avail_idx;
 241}
 242
 243static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 244                               unsigned int len)
 245{
 246    unsigned int offset;
 247    int i;
 248
 249    offset = 0;
 250    for (i = 0; i < elem->in_num; i++) {
 251        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 252
 253        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
 254                                  elem->in_sg[i].iov_len,
 255                                  1, size);
 256
 257        offset += size;
 258    }
 259
 260    for (i = 0; i < elem->out_num; i++)
 261        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
 262                                  elem->out_sg[i].iov_len,
 263                                  0, elem->out_sg[i].iov_len);
 264}
 265
 266/* virtqueue_detach_element:
 267 * @vq: The #VirtQueue
 268 * @elem: The #VirtQueueElement
 269 * @len: number of bytes written
 270 *
 271 * Detach the element from the virtqueue.  This function is suitable for device
 272 * reset or other situations where a #VirtQueueElement is simply freed and will
 273 * not be pushed or discarded.
 274 */
 275void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 276                              unsigned int len)
 277{
 278    vq->inuse--;
 279    virtqueue_unmap_sg(vq, elem, len);
 280}
 281
 282/* virtqueue_unpop:
 283 * @vq: The #VirtQueue
 284 * @elem: The #VirtQueueElement
 285 * @len: number of bytes written
 286 *
 287 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 288 * call to virtqueue_pop() will refetch the element.
 289 */
 290void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 291                     unsigned int len)
 292{
 293    vq->last_avail_idx--;
 294    virtqueue_detach_element(vq, elem, len);
 295}
 296
 297/* virtqueue_rewind:
 298 * @vq: The #VirtQueue
 299 * @num: Number of elements to push back
 300 *
 301 * Pretend that elements weren't popped from the virtqueue.  The next
 302 * virtqueue_pop() will refetch the oldest element.
 303 *
 304 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 305 *
 306 * Returns: true on success, false if @num is greater than the number of in use
 307 * elements.
 308 */
 309bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 310{
 311    if (num > vq->inuse) {
 312        return false;
 313    }
 314    vq->last_avail_idx -= num;
 315    vq->inuse -= num;
 316    return true;
 317}
 318
 319void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 320                    unsigned int len, unsigned int idx)
 321{
 322    VRingUsedElem uelem;
 323
 324    trace_virtqueue_fill(vq, elem, len, idx);
 325
 326    virtqueue_unmap_sg(vq, elem, len);
 327
 328    if (unlikely(vq->vdev->broken)) {
 329        return;
 330    }
 331
 332    idx = (idx + vq->used_idx) % vq->vring.num;
 333
 334    uelem.id = elem->index;
 335    uelem.len = len;
 336    vring_used_write(vq, &uelem, idx);
 337}
 338
 339void virtqueue_flush(VirtQueue *vq, unsigned int count)
 340{
 341    uint16_t old, new;
 342
 343    if (unlikely(vq->vdev->broken)) {
 344        vq->inuse -= count;
 345        return;
 346    }
 347
 348    /* Make sure buffer is written before we update index. */
 349    smp_wmb();
 350    trace_virtqueue_flush(vq, count);
 351    old = vq->used_idx;
 352    new = old + count;
 353    vring_used_idx_set(vq, new);
 354    vq->inuse -= count;
 355    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 356        vq->signalled_used_valid = false;
 357}
 358
 359void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 360                    unsigned int len)
 361{
 362    virtqueue_fill(vq, elem, len, 0);
 363    virtqueue_flush(vq, 1);
 364}
 365
 366static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 367{
 368    uint16_t num_heads = vring_avail_idx(vq) - idx;
 369
 370    /* Check it isn't doing very strange things with descriptor numbers. */
 371    if (num_heads > vq->vring.num) {
 372        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 373                     idx, vq->shadow_avail_idx);
 374        return -EINVAL;
 375    }
 376    /* On success, callers read a descriptor at vq->last_avail_idx.
 377     * Make sure descriptor read does not bypass avail index read. */
 378    if (num_heads) {
 379        smp_rmb();
 380    }
 381
 382    return num_heads;
 383}
 384
 385static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 386                               unsigned int *head)
 387{
 388    /* Grab the next descriptor number they're advertising, and increment
 389     * the index we've seen. */
 390    *head = vring_avail_ring(vq, idx % vq->vring.num);
 391
 392    /* If their number is silly, that's a fatal mistake. */
 393    if (*head >= vq->vring.num) {
 394        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 395        return false;
 396    }
 397
 398    return true;
 399}
 400
 401enum {
 402    VIRTQUEUE_READ_DESC_ERROR = -1,
 403    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 404    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 405};
 406
 407static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 408                                    hwaddr desc_pa, unsigned int max,
 409                                    unsigned int *next)
 410{
 411    /* If this descriptor says it doesn't chain, we're done. */
 412    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 413        return VIRTQUEUE_READ_DESC_DONE;
 414    }
 415
 416    /* Check they're not leading us off end of descriptors. */
 417    *next = desc->next;
 418    /* Make sure compiler knows to grab that: we don't want it changing! */
 419    smp_wmb();
 420
 421    if (*next >= max) {
 422        virtio_error(vdev, "Desc next is %u", *next);
 423        return VIRTQUEUE_READ_DESC_ERROR;
 424    }
 425
 426    vring_desc_read(vdev, desc, desc_pa, *next);
 427    return VIRTQUEUE_READ_DESC_MORE;
 428}
 429
 430void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 431                               unsigned int *out_bytes,
 432                               unsigned max_in_bytes, unsigned max_out_bytes)
 433{
 434    unsigned int idx;
 435    unsigned int total_bufs, in_total, out_total;
 436    int rc;
 437
 438    idx = vq->last_avail_idx;
 439
 440    total_bufs = in_total = out_total = 0;
 441    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 442        VirtIODevice *vdev = vq->vdev;
 443        unsigned int max, num_bufs, indirect = 0;
 444        VRingDesc desc;
 445        hwaddr desc_pa;
 446        unsigned int i;
 447
 448        max = vq->vring.num;
 449        num_bufs = total_bufs;
 450
 451        if (!virtqueue_get_head(vq, idx++, &i)) {
 452            goto err;
 453        }
 454
 455        desc_pa = vq->vring.desc;
 456        vring_desc_read(vdev, &desc, desc_pa, i);
 457
 458        if (desc.flags & VRING_DESC_F_INDIRECT) {
 459            if (desc.len % sizeof(VRingDesc)) {
 460                virtio_error(vdev, "Invalid size for indirect buffer table");
 461                goto err;
 462            }
 463
 464            /* If we've got too many, that implies a descriptor loop. */
 465            if (num_bufs >= max) {
 466                virtio_error(vdev, "Looped descriptor");
 467                goto err;
 468            }
 469
 470            /* loop over the indirect descriptor table */
 471            indirect = 1;
 472            max = desc.len / sizeof(VRingDesc);
 473            desc_pa = desc.addr;
 474            num_bufs = i = 0;
 475            vring_desc_read(vdev, &desc, desc_pa, i);
 476        }
 477
 478        do {
 479            /* If we've got too many, that implies a descriptor loop. */
 480            if (++num_bufs > max) {
 481                virtio_error(vdev, "Looped descriptor");
 482                goto err;
 483            }
 484
 485            if (desc.flags & VRING_DESC_F_WRITE) {
 486                in_total += desc.len;
 487            } else {
 488                out_total += desc.len;
 489            }
 490            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
 491                goto done;
 492            }
 493
 494            rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
 495        } while (rc == VIRTQUEUE_READ_DESC_MORE);
 496
 497        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
 498            goto err;
 499        }
 500
 501        if (!indirect)
 502            total_bufs = num_bufs;
 503        else
 504            total_bufs++;
 505    }
 506
 507    if (rc < 0) {
 508        goto err;
 509    }
 510
 511done:
 512    if (in_bytes) {
 513        *in_bytes = in_total;
 514    }
 515    if (out_bytes) {
 516        *out_bytes = out_total;
 517    }
 518    return;
 519
 520err:
 521    in_total = out_total = 0;
 522    goto done;
 523}
 524
 525int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 526                          unsigned int out_bytes)
 527{
 528    unsigned int in_total, out_total;
 529
 530    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
 531    return in_bytes <= in_total && out_bytes <= out_total;
 532}
 533
 534static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
 535                               hwaddr *addr, struct iovec *iov,
 536                               unsigned int max_num_sg, bool is_write,
 537                               hwaddr pa, size_t sz)
 538{
 539    bool ok = false;
 540    unsigned num_sg = *p_num_sg;
 541    assert(num_sg <= max_num_sg);
 542
 543    if (!sz) {
 544        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
 545        goto out;
 546    }
 547
 548    while (sz) {
 549        hwaddr len = sz;
 550
 551        if (num_sg == max_num_sg) {
 552            virtio_error(vdev, "virtio: too many write descriptors in "
 553                               "indirect table");
 554            goto out;
 555        }
 556
 557        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
 558        if (!iov[num_sg].iov_base) {
 559            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
 560            goto out;
 561        }
 562
 563        iov[num_sg].iov_len = len;
 564        addr[num_sg] = pa;
 565
 566        sz -= len;
 567        pa += len;
 568        num_sg++;
 569    }
 570    ok = true;
 571
 572out:
 573    *p_num_sg = num_sg;
 574    return ok;
 575}
 576
 577/* Only used by error code paths before we have a VirtQueueElement (therefore
 578 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
 579 * yet.
 580 */
 581static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
 582                                    struct iovec *iov)
 583{
 584    unsigned int i;
 585
 586    for (i = 0; i < out_num + in_num; i++) {
 587        int is_write = i >= out_num;
 588
 589        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
 590        iov++;
 591    }
 592}
 593
 594static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
 595                                unsigned int *num_sg, unsigned int max_size,
 596                                int is_write)
 597{
 598    unsigned int i;
 599    hwaddr len;
 600
 601    /* Note: this function MUST validate input, some callers
 602     * are passing in num_sg values received over the network.
 603     */
 604    /* TODO: teach all callers that this can fail, and return failure instead
 605     * of asserting here.
 606     * When we do, we might be able to re-enable NDEBUG below.
 607     */
 608#ifdef NDEBUG
 609#error building with NDEBUG is not supported
 610#endif
 611    assert(*num_sg <= max_size);
 612
 613    for (i = 0; i < *num_sg; i++) {
 614        len = sg[i].iov_len;
 615        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
 616        if (!sg[i].iov_base) {
 617            error_report("virtio: error trying to map MMIO memory");
 618            exit(1);
 619        }
 620        if (len != sg[i].iov_len) {
 621            error_report("virtio: unexpected memory split");
 622            exit(1);
 623        }
 624    }
 625}
 626
 627void virtqueue_map(VirtQueueElement *elem)
 628{
 629    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
 630                        VIRTQUEUE_MAX_SIZE, 1);
 631    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
 632                        VIRTQUEUE_MAX_SIZE, 0);
 633}
 634
 635static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 636{
 637    VirtQueueElement *elem;
 638    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
 639    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
 640    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
 641    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
 642    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
 643    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
 644
 645    assert(sz >= sizeof(VirtQueueElement));
 646    elem = g_malloc(out_sg_end);
 647    elem->out_num = out_num;
 648    elem->in_num = in_num;
 649    elem->in_addr = (void *)elem + in_addr_ofs;
 650    elem->out_addr = (void *)elem + out_addr_ofs;
 651    elem->in_sg = (void *)elem + in_sg_ofs;
 652    elem->out_sg = (void *)elem + out_sg_ofs;
 653    return elem;
 654}
 655
 656void *virtqueue_pop(VirtQueue *vq, size_t sz)
 657{
 658    unsigned int i, head, max;
 659    hwaddr desc_pa = vq->vring.desc;
 660    VirtIODevice *vdev = vq->vdev;
 661    VirtQueueElement *elem;
 662    unsigned out_num, in_num;
 663    hwaddr addr[VIRTQUEUE_MAX_SIZE];
 664    struct iovec iov[VIRTQUEUE_MAX_SIZE];
 665    VRingDesc desc;
 666    int rc;
 667
 668    if (unlikely(vdev->broken)) {
 669        return NULL;
 670    }
 671    if (virtio_queue_empty(vq)) {
 672        return NULL;
 673    }
 674    /* Needed after virtio_queue_empty(), see comment in
 675     * virtqueue_num_heads(). */
 676    smp_rmb();
 677
 678    /* When we start there are none of either input nor output. */
 679    out_num = in_num = 0;
 680
 681    max = vq->vring.num;
 682
 683    if (vq->inuse >= vq->vring.num) {
 684        virtio_error(vdev, "Virtqueue size exceeded");
 685        return NULL;
 686    }
 687
 688    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
 689        return NULL;
 690    }
 691
 692    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
 693        vring_set_avail_event(vq, vq->last_avail_idx);
 694    }
 695
 696    i = head;
 697    vring_desc_read(vdev, &desc, desc_pa, i);
 698    if (desc.flags & VRING_DESC_F_INDIRECT) {
 699        if (desc.len % sizeof(VRingDesc)) {
 700            virtio_error(vdev, "Invalid size for indirect buffer table");
 701            return NULL;
 702        }
 703
 704        /* loop over the indirect descriptor table */
 705        max = desc.len / sizeof(VRingDesc);
 706        desc_pa = desc.addr;
 707        i = 0;
 708        vring_desc_read(vdev, &desc, desc_pa, i);
 709    }
 710
 711    /* Collect all the descriptors */
 712    do {
 713        bool map_ok;
 714
 715        if (desc.flags & VRING_DESC_F_WRITE) {
 716            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
 717                                        iov + out_num,
 718                                        VIRTQUEUE_MAX_SIZE - out_num, true,
 719                                        desc.addr, desc.len);
 720        } else {
 721            if (in_num) {
 722                virtio_error(vdev, "Incorrect order for descriptors");
 723                goto err_undo_map;
 724            }
 725            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
 726                                        VIRTQUEUE_MAX_SIZE, false,
 727                                        desc.addr, desc.len);
 728        }
 729        if (!map_ok) {
 730            goto err_undo_map;
 731        }
 732
 733        /* If we've got too many, that implies a descriptor loop. */
 734        if ((in_num + out_num) > max) {
 735            virtio_error(vdev, "Looped descriptor");
 736            goto err_undo_map;
 737        }
 738
 739        rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
 740    } while (rc == VIRTQUEUE_READ_DESC_MORE);
 741
 742    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
 743        goto err_undo_map;
 744    }
 745
 746    /* Now copy what we have collected and mapped */
 747    elem = virtqueue_alloc_element(sz, out_num, in_num);
 748    elem->index = head;
 749    for (i = 0; i < out_num; i++) {
 750        elem->out_addr[i] = addr[i];
 751        elem->out_sg[i] = iov[i];
 752    }
 753    for (i = 0; i < in_num; i++) {
 754        elem->in_addr[i] = addr[out_num + i];
 755        elem->in_sg[i] = iov[out_num + i];
 756    }
 757
 758    vq->inuse++;
 759
 760    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 761    return elem;
 762
 763err_undo_map:
 764    virtqueue_undo_map_desc(out_num, in_num, iov);
 765    return NULL;
 766}
 767
 768/* Reading and writing a structure directly to QEMUFile is *awful*, but
 769 * it is what QEMU has always done by mistake.  We can change it sooner
 770 * or later by bumping the version number of the affected vm states.
 771 * In the meanwhile, since the in-memory layout of VirtQueueElement
 772 * has changed, we need to marshal to and from the layout that was
 773 * used before the change.
 774 */
 775typedef struct VirtQueueElementOld {
 776    unsigned int index;
 777    unsigned int out_num;
 778    unsigned int in_num;
 779    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
 780    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
 781    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
 782    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 783} VirtQueueElementOld;
 784
 785void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
 786{
 787    VirtQueueElement *elem;
 788    VirtQueueElementOld data;
 789    int i;
 790
 791    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 792
 793    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
 794    elem->index = data.index;
 795
 796    for (i = 0; i < elem->in_num; i++) {
 797        elem->in_addr[i] = data.in_addr[i];
 798    }
 799
 800    for (i = 0; i < elem->out_num; i++) {
 801        elem->out_addr[i] = data.out_addr[i];
 802    }
 803
 804    for (i = 0; i < elem->in_num; i++) {
 805        /* Base is overwritten by virtqueue_map.  */
 806        elem->in_sg[i].iov_base = 0;
 807        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
 808    }
 809
 810    for (i = 0; i < elem->out_num; i++) {
 811        /* Base is overwritten by virtqueue_map.  */
 812        elem->out_sg[i].iov_base = 0;
 813        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
 814    }
 815
 816    virtqueue_map(elem);
 817    return elem;
 818}
 819
 820void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
 821{
 822    VirtQueueElementOld data;
 823    int i;
 824
 825    memset(&data, 0, sizeof(data));
 826    data.index = elem->index;
 827    data.in_num = elem->in_num;
 828    data.out_num = elem->out_num;
 829
 830    for (i = 0; i < elem->in_num; i++) {
 831        data.in_addr[i] = elem->in_addr[i];
 832    }
 833
 834    for (i = 0; i < elem->out_num; i++) {
 835        data.out_addr[i] = elem->out_addr[i];
 836    }
 837
 838    for (i = 0; i < elem->in_num; i++) {
 839        /* Base is overwritten by virtqueue_map when loading.  Do not
 840         * save it, as it would leak the QEMU address space layout.  */
 841        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
 842    }
 843
 844    for (i = 0; i < elem->out_num; i++) {
 845        /* Do not save iov_base as above.  */
 846        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
 847    }
 848    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 849}
 850
 851/* virtio device */
 852static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 853{
 854    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 855    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 856
 857    if (unlikely(vdev->broken)) {
 858        return;
 859    }
 860
 861    if (k->notify) {
 862        k->notify(qbus->parent, vector);
 863    }
 864}
 865
 866void virtio_update_irq(VirtIODevice *vdev)
 867{
 868    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 869}
 870
 871static int virtio_validate_features(VirtIODevice *vdev)
 872{
 873    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 874
 875    if (k->validate_features) {
 876        return k->validate_features(vdev);
 877    } else {
 878        return 0;
 879    }
 880}
 881
 882int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 883{
 884    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 885    trace_virtio_set_status(vdev, val);
 886
 887    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 888        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
 889            val & VIRTIO_CONFIG_S_FEATURES_OK) {
 890            int ret = virtio_validate_features(vdev);
 891
 892            if (ret) {
 893                return ret;
 894            }
 895        }
 896    }
 897    if (k->set_status) {
 898        k->set_status(vdev, val);
 899    }
 900    vdev->status = val;
 901    return 0;
 902}
 903
 904bool target_words_bigendian(void);
 905static enum virtio_device_endian virtio_default_endian(void)
 906{
 907    if (target_words_bigendian()) {
 908        return VIRTIO_DEVICE_ENDIAN_BIG;
 909    } else {
 910        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 911    }
 912}
 913
 914static enum virtio_device_endian virtio_current_cpu_endian(void)
 915{
 916    CPUClass *cc = CPU_GET_CLASS(current_cpu);
 917
 918    if (cc->virtio_is_big_endian(current_cpu)) {
 919        return VIRTIO_DEVICE_ENDIAN_BIG;
 920    } else {
 921        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 922    }
 923}
 924
 925void virtio_reset(void *opaque)
 926{
 927    VirtIODevice *vdev = opaque;
 928    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 929    int i;
 930
 931    virtio_set_status(vdev, 0);
 932    if (current_cpu) {
 933        /* Guest initiated reset */
 934        vdev->device_endian = virtio_current_cpu_endian();
 935    } else {
 936        /* System reset */
 937        vdev->device_endian = virtio_default_endian();
 938    }
 939
 940    if (k->reset) {
 941        k->reset(vdev);
 942    }
 943
 944    vdev->broken = false;
 945    vdev->guest_features = 0;
 946    vdev->queue_sel = 0;
 947    vdev->status = 0;
 948    atomic_set(&vdev->isr, 0);
 949    vdev->config_vector = VIRTIO_NO_VECTOR;
 950    virtio_notify_vector(vdev, vdev->config_vector);
 951
 952    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
 953        vdev->vq[i].vring.desc = 0;
 954        vdev->vq[i].vring.avail = 0;
 955        vdev->vq[i].vring.used = 0;
 956        vdev->vq[i].last_avail_idx = 0;
 957        vdev->vq[i].shadow_avail_idx = 0;
 958        vdev->vq[i].used_idx = 0;
 959        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 960        vdev->vq[i].signalled_used = 0;
 961        vdev->vq[i].signalled_used_valid = false;
 962        vdev->vq[i].notification = true;
 963        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
 964        vdev->vq[i].inuse = 0;
 965    }
 966}
 967
 968uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 969{
 970    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 971    uint8_t val;
 972
 973    if (addr + sizeof(val) > vdev->config_len) {
 974        return (uint32_t)-1;
 975    }
 976
 977    k->get_config(vdev, vdev->config);
 978
 979    val = ldub_p(vdev->config + addr);
 980    return val;
 981}
 982
 983uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 984{
 985    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 986    uint16_t val;
 987
 988    if (addr + sizeof(val) > vdev->config_len) {
 989        return (uint32_t)-1;
 990    }
 991
 992    k->get_config(vdev, vdev->config);
 993
 994    val = lduw_p(vdev->config + addr);
 995    return val;
 996}
 997
 998uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 999{
1000    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1001    uint32_t val;
1002
1003    if (addr + sizeof(val) > vdev->config_len) {
1004        return (uint32_t)-1;
1005    }
1006
1007    k->get_config(vdev, vdev->config);
1008
1009    val = ldl_p(vdev->config + addr);
1010    return val;
1011}
1012
1013void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1014{
1015    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1016    uint8_t val = data;
1017
1018    if (addr + sizeof(val) > vdev->config_len) {
1019        return;
1020    }
1021
1022    stb_p(vdev->config + addr, val);
1023
1024    if (k->set_config) {
1025        k->set_config(vdev, vdev->config);
1026    }
1027}
1028
1029void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1030{
1031    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1032    uint16_t val = data;
1033
1034    if (addr + sizeof(val) > vdev->config_len) {
1035        return;
1036    }
1037
1038    stw_p(vdev->config + addr, val);
1039
1040    if (k->set_config) {
1041        k->set_config(vdev, vdev->config);
1042    }
1043}
1044
1045void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1046{
1047    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1048    uint32_t val = data;
1049
1050    if (addr + sizeof(val) > vdev->config_len) {
1051        return;
1052    }
1053
1054    stl_p(vdev->config + addr, val);
1055
1056    if (k->set_config) {
1057        k->set_config(vdev, vdev->config);
1058    }
1059}
1060
1061uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1062{
1063    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1064    uint8_t val;
1065
1066    if (addr + sizeof(val) > vdev->config_len) {
1067        return (uint32_t)-1;
1068    }
1069
1070    k->get_config(vdev, vdev->config);
1071
1072    val = ldub_p(vdev->config + addr);
1073    return val;
1074}
1075
1076uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1077{
1078    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1079    uint16_t val;
1080
1081    if (addr + sizeof(val) > vdev->config_len) {
1082        return (uint32_t)-1;
1083    }
1084
1085    k->get_config(vdev, vdev->config);
1086
1087    val = lduw_le_p(vdev->config + addr);
1088    return val;
1089}
1090
1091uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1092{
1093    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1094    uint32_t val;
1095
1096    if (addr + sizeof(val) > vdev->config_len) {
1097        return (uint32_t)-1;
1098    }
1099
1100    k->get_config(vdev, vdev->config);
1101
1102    val = ldl_le_p(vdev->config + addr);
1103    return val;
1104}
1105
1106void virtio_config_modern_writeb(VirtIODevice *vdev,
1107                                 uint32_t addr, uint32_t data)
1108{
1109    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1110    uint8_t val = data;
1111
1112    if (addr + sizeof(val) > vdev->config_len) {
1113        return;
1114    }
1115
1116    stb_p(vdev->config + addr, val);
1117
1118    if (k->set_config) {
1119        k->set_config(vdev, vdev->config);
1120    }
1121}
1122
1123void virtio_config_modern_writew(VirtIODevice *vdev,
1124                                 uint32_t addr, uint32_t data)
1125{
1126    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1127    uint16_t val = data;
1128
1129    if (addr + sizeof(val) > vdev->config_len) {
1130        return;
1131    }
1132
1133    stw_le_p(vdev->config + addr, val);
1134
1135    if (k->set_config) {
1136        k->set_config(vdev, vdev->config);
1137    }
1138}
1139
1140void virtio_config_modern_writel(VirtIODevice *vdev,
1141                                 uint32_t addr, uint32_t data)
1142{
1143    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1144    uint32_t val = data;
1145
1146    if (addr + sizeof(val) > vdev->config_len) {
1147        return;
1148    }
1149
1150    stl_le_p(vdev->config + addr, val);
1151
1152    if (k->set_config) {
1153        k->set_config(vdev, vdev->config);
1154    }
1155}
1156
1157void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1158{
1159    vdev->vq[n].vring.desc = addr;
1160    virtio_queue_update_rings(vdev, n);
1161}
1162
1163hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1164{
1165    return vdev->vq[n].vring.desc;
1166}
1167
1168void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1169                            hwaddr avail, hwaddr used)
1170{
1171    vdev->vq[n].vring.desc = desc;
1172    vdev->vq[n].vring.avail = avail;
1173    vdev->vq[n].vring.used = used;
1174}
1175
1176void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1177{
1178    /* Don't allow guest to flip queue between existent and
1179     * nonexistent states, or to set it to an invalid size.
1180     */
1181    if (!!num != !!vdev->vq[n].vring.num ||
1182        num > VIRTQUEUE_MAX_SIZE ||
1183        num < 0) {
1184        return;
1185    }
1186    vdev->vq[n].vring.num = num;
1187}
1188
1189VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1190{
1191    return QLIST_FIRST(&vdev->vector_queues[vector]);
1192}
1193
1194VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1195{
1196    return QLIST_NEXT(vq, node);
1197}
1198
1199int virtio_queue_get_num(VirtIODevice *vdev, int n)
1200{
1201    return vdev->vq[n].vring.num;
1202}
1203
1204int virtio_get_num_queues(VirtIODevice *vdev)
1205{
1206    int i;
1207
1208    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1209        if (!virtio_queue_get_num(vdev, i)) {
1210            break;
1211        }
1212    }
1213
1214    return i;
1215}
1216
1217void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1218{
1219    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1220    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1221
1222    /* virtio-1 compliant devices cannot change the alignment */
1223    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1224        error_report("tried to modify queue alignment for virtio-1 device");
1225        return;
1226    }
1227    /* Check that the transport told us it was going to do this
1228     * (so a buggy transport will immediately assert rather than
1229     * silently failing to migrate this state)
1230     */
1231    assert(k->has_variable_vring_alignment);
1232
1233    vdev->vq[n].vring.align = align;
1234    virtio_queue_update_rings(vdev, n);
1235}
1236
1237static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1238{
1239    if (vq->vring.desc && vq->handle_aio_output) {
1240        VirtIODevice *vdev = vq->vdev;
1241
1242        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1243        vq->handle_aio_output(vdev, vq);
1244    }
1245}
1246
1247static void virtio_queue_notify_vq(VirtQueue *vq)
1248{
1249    if (vq->vring.desc && vq->handle_output) {
1250        VirtIODevice *vdev = vq->vdev;
1251
1252        if (unlikely(vdev->broken)) {
1253            return;
1254        }
1255
1256        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1257        vq->handle_output(vdev, vq);
1258    }
1259}
1260
1261void virtio_queue_notify(VirtIODevice *vdev, int n)
1262{
1263    virtio_queue_notify_vq(&vdev->vq[n]);
1264}
1265
1266uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1267{
1268    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1269        VIRTIO_NO_VECTOR;
1270}
1271
1272void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1273{
1274    VirtQueue *vq = &vdev->vq[n];
1275
1276    if (n < VIRTIO_QUEUE_MAX) {
1277        if (vdev->vector_queues &&
1278            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1279            QLIST_REMOVE(vq, node);
1280        }
1281        vdev->vq[n].vector = vector;
1282        if (vdev->vector_queues &&
1283            vector != VIRTIO_NO_VECTOR) {
1284            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1285        }
1286    }
1287}
1288
1289VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1290                            VirtIOHandleOutput handle_output)
1291{
1292    int i;
1293
1294    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1295        if (vdev->vq[i].vring.num == 0)
1296            break;
1297    }
1298
1299    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1300        abort();
1301
1302    vdev->vq[i].vring.num = queue_size;
1303    vdev->vq[i].vring.num_default = queue_size;
1304    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1305    vdev->vq[i].handle_output = handle_output;
1306    vdev->vq[i].handle_aio_output = NULL;
1307
1308    return &vdev->vq[i];
1309}
1310
1311void virtio_del_queue(VirtIODevice *vdev, int n)
1312{
1313    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1314        abort();
1315    }
1316
1317    vdev->vq[n].vring.num = 0;
1318    vdev->vq[n].vring.num_default = 0;
1319}
1320
1321static void virtio_set_isr(VirtIODevice *vdev, int value)
1322{
1323    uint8_t old = atomic_read(&vdev->isr);
1324
1325    /* Do not write ISR if it does not change, so that its cacheline remains
1326     * shared in the common case where the guest does not read it.
1327     */
1328    if ((old & value) != value) {
1329        atomic_or(&vdev->isr, value);
1330    }
1331}
1332
1333bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1334{
1335    uint16_t old, new;
1336    bool v;
1337    /* We need to expose used array entries before checking used event. */
1338    smp_mb();
1339    /* Always notify when queue is empty (when feature acknowledge) */
1340    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1341        !vq->inuse && virtio_queue_empty(vq)) {
1342        return true;
1343    }
1344
1345    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1346        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1347    }
1348
1349    v = vq->signalled_used_valid;
1350    vq->signalled_used_valid = true;
1351    old = vq->signalled_used;
1352    new = vq->signalled_used = vq->used_idx;
1353    return !v || vring_need_event(vring_get_used_event(vq), new, old);
1354}
1355
1356void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1357{
1358    if (!virtio_should_notify(vdev, vq)) {
1359        return;
1360    }
1361
1362    trace_virtio_notify_irqfd(vdev, vq);
1363
1364    /*
1365     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1366     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1367     * incorrectly polling this bit during crashdump and hibernation
1368     * in MSI mode, causing a hang if this bit is never updated.
1369     * Recent releases of Windows do not really shut down, but rather
1370     * log out and hibernate to make the next startup faster.  Hence,
1371     * this manifested as a more serious hang during shutdown with
1372     *
1373     * Next driver release from 2016 fixed this problem, so working around it
1374     * is not a must, but it's easy to do so let's do it here.
1375     *
1376     * Note: it's safe to update ISR from any thread as it was switched
1377     * to an atomic operation.
1378     */
1379    virtio_set_isr(vq->vdev, 0x1);
1380    event_notifier_set(&vq->guest_notifier);
1381}
1382
1383void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1384{
1385    if (!virtio_should_notify(vdev, vq)) {
1386        return;
1387    }
1388
1389    trace_virtio_notify(vdev, vq);
1390    virtio_set_isr(vq->vdev, 0x1);
1391    virtio_notify_vector(vdev, vq->vector);
1392}
1393
1394void virtio_notify_config(VirtIODevice *vdev)
1395{
1396    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1397        return;
1398
1399    virtio_set_isr(vdev, 0x3);
1400    vdev->generation++;
1401    virtio_notify_vector(vdev, vdev->config_vector);
1402}
1403
1404static bool virtio_device_endian_needed(void *opaque)
1405{
1406    VirtIODevice *vdev = opaque;
1407
1408    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1409    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1410        return vdev->device_endian != virtio_default_endian();
1411    }
1412    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1413    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1414}
1415
1416static bool virtio_64bit_features_needed(void *opaque)
1417{
1418    VirtIODevice *vdev = opaque;
1419
1420    return (vdev->host_features >> 32) != 0;
1421}
1422
1423static bool virtio_virtqueue_needed(void *opaque)
1424{
1425    VirtIODevice *vdev = opaque;
1426
1427    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1428}
1429
1430static bool virtio_ringsize_needed(void *opaque)
1431{
1432    VirtIODevice *vdev = opaque;
1433    int i;
1434
1435    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1436        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1437            return true;
1438        }
1439    }
1440    return false;
1441}
1442
1443static bool virtio_extra_state_needed(void *opaque)
1444{
1445    VirtIODevice *vdev = opaque;
1446    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1447    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1448
1449    return k->has_extra_state &&
1450        k->has_extra_state(qbus->parent);
1451}
1452
1453static bool virtio_broken_needed(void *opaque)
1454{
1455    VirtIODevice *vdev = opaque;
1456
1457    return vdev->broken;
1458}
1459
1460static const VMStateDescription vmstate_virtqueue = {
1461    .name = "virtqueue_state",
1462    .version_id = 1,
1463    .minimum_version_id = 1,
1464    .fields = (VMStateField[]) {
1465        VMSTATE_UINT64(vring.avail, struct VirtQueue),
1466        VMSTATE_UINT64(vring.used, struct VirtQueue),
1467        VMSTATE_END_OF_LIST()
1468    }
1469};
1470
1471static const VMStateDescription vmstate_virtio_virtqueues = {
1472    .name = "virtio/virtqueues",
1473    .version_id = 1,
1474    .minimum_version_id = 1,
1475    .needed = &virtio_virtqueue_needed,
1476    .fields = (VMStateField[]) {
1477        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1478                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1479        VMSTATE_END_OF_LIST()
1480    }
1481};
1482
1483static const VMStateDescription vmstate_ringsize = {
1484    .name = "ringsize_state",
1485    .version_id = 1,
1486    .minimum_version_id = 1,
1487    .fields = (VMStateField[]) {
1488        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1489        VMSTATE_END_OF_LIST()
1490    }
1491};
1492
1493static const VMStateDescription vmstate_virtio_ringsize = {
1494    .name = "virtio/ringsize",
1495    .version_id = 1,
1496    .minimum_version_id = 1,
1497    .needed = &virtio_ringsize_needed,
1498    .fields = (VMStateField[]) {
1499        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1500                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1501        VMSTATE_END_OF_LIST()
1502    }
1503};
1504
1505static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1506{
1507    VirtIODevice *vdev = pv;
1508    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1509    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1510
1511    if (!k->load_extra_state) {
1512        return -1;
1513    } else {
1514        return k->load_extra_state(qbus->parent, f);
1515    }
1516}
1517
1518static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1519{
1520    VirtIODevice *vdev = pv;
1521    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1522    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1523
1524    k->save_extra_state(qbus->parent, f);
1525}
1526
1527static const VMStateInfo vmstate_info_extra_state = {
1528    .name = "virtqueue_extra_state",
1529    .get = get_extra_state,
1530    .put = put_extra_state,
1531};
1532
1533static const VMStateDescription vmstate_virtio_extra_state = {
1534    .name = "virtio/extra_state",
1535    .version_id = 1,
1536    .minimum_version_id = 1,
1537    .needed = &virtio_extra_state_needed,
1538    .fields = (VMStateField[]) {
1539        {
1540            .name         = "extra_state",
1541            .version_id   = 0,
1542            .field_exists = NULL,
1543            .size         = 0,
1544            .info         = &vmstate_info_extra_state,
1545            .flags        = VMS_SINGLE,
1546            .offset       = 0,
1547        },
1548        VMSTATE_END_OF_LIST()
1549    }
1550};
1551
1552static const VMStateDescription vmstate_virtio_device_endian = {
1553    .name = "virtio/device_endian",
1554    .version_id = 1,
1555    .minimum_version_id = 1,
1556    .needed = &virtio_device_endian_needed,
1557    .fields = (VMStateField[]) {
1558        VMSTATE_UINT8(device_endian, VirtIODevice),
1559        VMSTATE_END_OF_LIST()
1560    }
1561};
1562
1563static const VMStateDescription vmstate_virtio_64bit_features = {
1564    .name = "virtio/64bit_features",
1565    .version_id = 1,
1566    .minimum_version_id = 1,
1567    .needed = &virtio_64bit_features_needed,
1568    .fields = (VMStateField[]) {
1569        VMSTATE_UINT64(guest_features, VirtIODevice),
1570        VMSTATE_END_OF_LIST()
1571    }
1572};
1573
1574static const VMStateDescription vmstate_virtio_broken = {
1575    .name = "virtio/broken",
1576    .version_id = 1,
1577    .minimum_version_id = 1,
1578    .needed = &virtio_broken_needed,
1579    .fields = (VMStateField[]) {
1580        VMSTATE_BOOL(broken, VirtIODevice),
1581        VMSTATE_END_OF_LIST()
1582    }
1583};
1584
1585static const VMStateDescription vmstate_virtio = {
1586    .name = "virtio",
1587    .version_id = 1,
1588    .minimum_version_id = 1,
1589    .minimum_version_id_old = 1,
1590    .fields = (VMStateField[]) {
1591        VMSTATE_END_OF_LIST()
1592    },
1593    .subsections = (const VMStateDescription*[]) {
1594        &vmstate_virtio_device_endian,
1595        &vmstate_virtio_64bit_features,
1596        &vmstate_virtio_virtqueues,
1597        &vmstate_virtio_ringsize,
1598        &vmstate_virtio_broken,
1599        &vmstate_virtio_extra_state,
1600        NULL
1601    }
1602};
1603
1604void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1605{
1606    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1607    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1608    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1609    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1610    int i;
1611
1612    if (k->save_config) {
1613        k->save_config(qbus->parent, f);
1614    }
1615
1616    qemu_put_8s(f, &vdev->status);
1617    qemu_put_8s(f, &vdev->isr);
1618    qemu_put_be16s(f, &vdev->queue_sel);
1619    qemu_put_be32s(f, &guest_features_lo);
1620    qemu_put_be32(f, vdev->config_len);
1621    qemu_put_buffer(f, vdev->config, vdev->config_len);
1622
1623    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1624        if (vdev->vq[i].vring.num == 0)
1625            break;
1626    }
1627
1628    qemu_put_be32(f, i);
1629
1630    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1631        if (vdev->vq[i].vring.num == 0)
1632            break;
1633
1634        qemu_put_be32(f, vdev->vq[i].vring.num);
1635        if (k->has_variable_vring_alignment) {
1636            qemu_put_be32(f, vdev->vq[i].vring.align);
1637        }
1638        /* XXX virtio-1 devices */
1639        qemu_put_be64(f, vdev->vq[i].vring.desc);
1640        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1641        if (k->save_queue) {
1642            k->save_queue(qbus->parent, i, f);
1643        }
1644    }
1645
1646    if (vdc->save != NULL) {
1647        vdc->save(vdev, f);
1648    }
1649
1650    if (vdc->vmsd) {
1651        vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1652    }
1653
1654    /* Subsections */
1655    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1656}
1657
1658/* A wrapper for use as a VMState .put function */
1659static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1660{
1661    virtio_save(VIRTIO_DEVICE(opaque), f);
1662}
1663
1664/* A wrapper for use as a VMState .get function */
1665static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1666{
1667    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1668    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1669
1670    return virtio_load(vdev, f, dc->vmsd->version_id);
1671}
1672
1673const VMStateInfo  virtio_vmstate_info = {
1674    .name = "virtio",
1675    .get = virtio_device_get,
1676    .put = virtio_device_put,
1677};
1678
1679static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1680{
1681    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1682    bool bad = (val & ~(vdev->host_features)) != 0;
1683
1684    val &= vdev->host_features;
1685    if (k->set_features) {
1686        k->set_features(vdev, val);
1687    }
1688    vdev->guest_features = val;
1689    return bad ? -1 : 0;
1690}
1691
1692int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1693{
1694   /*
1695     * The driver must not attempt to set features after feature negotiation
1696     * has finished.
1697     */
1698    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1699        return -EINVAL;
1700    }
1701    return virtio_set_features_nocheck(vdev, val);
1702}
1703
1704int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1705{
1706    int i, ret;
1707    int32_t config_len;
1708    uint32_t num;
1709    uint32_t features;
1710    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1711    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1712    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1713
1714    /*
1715     * We poison the endianness to ensure it does not get used before
1716     * subsections have been loaded.
1717     */
1718    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1719
1720    if (k->load_config) {
1721        ret = k->load_config(qbus->parent, f);
1722        if (ret)
1723            return ret;
1724    }
1725
1726    qemu_get_8s(f, &vdev->status);
1727    qemu_get_8s(f, &vdev->isr);
1728    qemu_get_be16s(f, &vdev->queue_sel);
1729    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1730        return -1;
1731    }
1732    qemu_get_be32s(f, &features);
1733
1734    /*
1735     * Temporarily set guest_features low bits - needed by
1736     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1737     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1738     *
1739     * Note: devices should always test host features in future - don't create
1740     * new dependencies like this.
1741     */
1742    vdev->guest_features = features;
1743
1744    config_len = qemu_get_be32(f);
1745
1746    /*
1747     * There are cases where the incoming config can be bigger or smaller
1748     * than what we have; so load what we have space for, and skip
1749     * any excess that's in the stream.
1750     */
1751    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1752
1753    while (config_len > vdev->config_len) {
1754        qemu_get_byte(f);
1755        config_len--;
1756    }
1757
1758    num = qemu_get_be32(f);
1759
1760    if (num > VIRTIO_QUEUE_MAX) {
1761        error_report("Invalid number of virtqueues: 0x%x", num);
1762        return -1;
1763    }
1764
1765    for (i = 0; i < num; i++) {
1766        vdev->vq[i].vring.num = qemu_get_be32(f);
1767        if (k->has_variable_vring_alignment) {
1768            vdev->vq[i].vring.align = qemu_get_be32(f);
1769        }
1770        vdev->vq[i].vring.desc = qemu_get_be64(f);
1771        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1772        vdev->vq[i].signalled_used_valid = false;
1773        vdev->vq[i].notification = true;
1774
1775        if (vdev->vq[i].vring.desc) {
1776            /* XXX virtio-1 devices */
1777            virtio_queue_update_rings(vdev, i);
1778        } else if (vdev->vq[i].last_avail_idx) {
1779            error_report("VQ %d address 0x0 "
1780                         "inconsistent with Host index 0x%x",
1781                         i, vdev->vq[i].last_avail_idx);
1782                return -1;
1783        }
1784        if (k->load_queue) {
1785            ret = k->load_queue(qbus->parent, i, f);
1786            if (ret)
1787                return ret;
1788        }
1789    }
1790
1791    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1792
1793    if (vdc->load != NULL) {
1794        ret = vdc->load(vdev, f, version_id);
1795        if (ret) {
1796            return ret;
1797        }
1798    }
1799
1800    if (vdc->vmsd) {
1801        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1802        if (ret) {
1803            return ret;
1804        }
1805    }
1806
1807    /* Subsections */
1808    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1809    if (ret) {
1810        return ret;
1811    }
1812
1813    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1814        vdev->device_endian = virtio_default_endian();
1815    }
1816
1817    if (virtio_64bit_features_needed(vdev)) {
1818        /*
1819         * Subsection load filled vdev->guest_features.  Run them
1820         * through virtio_set_features to sanity-check them against
1821         * host_features.
1822         */
1823        uint64_t features64 = vdev->guest_features;
1824        if (virtio_set_features_nocheck(vdev, features64) < 0) {
1825            error_report("Features 0x%" PRIx64 " unsupported. "
1826                         "Allowed features: 0x%" PRIx64,
1827                         features64, vdev->host_features);
1828            return -1;
1829        }
1830    } else {
1831        if (virtio_set_features_nocheck(vdev, features) < 0) {
1832            error_report("Features 0x%x unsupported. "
1833                         "Allowed features: 0x%" PRIx64,
1834                         features, vdev->host_features);
1835            return -1;
1836        }
1837    }
1838
1839    for (i = 0; i < num; i++) {
1840        if (vdev->vq[i].vring.desc) {
1841            uint16_t nheads;
1842            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1843            /* Check it isn't doing strange things with descriptor numbers. */
1844            if (nheads > vdev->vq[i].vring.num) {
1845                error_report("VQ %d size 0x%x Guest index 0x%x "
1846                             "inconsistent with Host index 0x%x: delta 0x%x",
1847                             i, vdev->vq[i].vring.num,
1848                             vring_avail_idx(&vdev->vq[i]),
1849                             vdev->vq[i].last_avail_idx, nheads);
1850                return -1;
1851            }
1852            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1853            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1854
1855            /*
1856             * Some devices migrate VirtQueueElements that have been popped
1857             * from the avail ring but not yet returned to the used ring.
1858             */
1859            vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1860                                vdev->vq[i].used_idx;
1861            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1862                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1863                             "used_idx 0x%x",
1864                             i, vdev->vq[i].vring.num,
1865                             vdev->vq[i].last_avail_idx,
1866                             vdev->vq[i].used_idx);
1867                return -1;
1868            }
1869        }
1870    }
1871
1872    return 0;
1873}
1874
1875void virtio_cleanup(VirtIODevice *vdev)
1876{
1877    qemu_del_vm_change_state_handler(vdev->vmstate);
1878    g_free(vdev->config);
1879    g_free(vdev->vq);
1880    g_free(vdev->vector_queues);
1881}
1882
1883static void virtio_vmstate_change(void *opaque, int running, RunState state)
1884{
1885    VirtIODevice *vdev = opaque;
1886    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1887    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1888    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1889    vdev->vm_running = running;
1890
1891    if (backend_run) {
1892        virtio_set_status(vdev, vdev->status);
1893    }
1894
1895    if (k->vmstate_change) {
1896        k->vmstate_change(qbus->parent, backend_run);
1897    }
1898
1899    if (!backend_run) {
1900        virtio_set_status(vdev, vdev->status);
1901    }
1902}
1903
1904void virtio_instance_init_common(Object *proxy_obj, void *data,
1905                                 size_t vdev_size, const char *vdev_name)
1906{
1907    DeviceState *vdev = data;
1908
1909    object_initialize(vdev, vdev_size, vdev_name);
1910    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1911    object_unref(OBJECT(vdev));
1912    qdev_alias_all_properties(vdev, proxy_obj);
1913}
1914
1915void virtio_init(VirtIODevice *vdev, const char *name,
1916                 uint16_t device_id, size_t config_size)
1917{
1918    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1919    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1920    int i;
1921    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1922
1923    if (nvectors) {
1924        vdev->vector_queues =
1925            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1926    }
1927
1928    vdev->device_id = device_id;
1929    vdev->status = 0;
1930    atomic_set(&vdev->isr, 0);
1931    vdev->queue_sel = 0;
1932    vdev->config_vector = VIRTIO_NO_VECTOR;
1933    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1934    vdev->vm_running = runstate_is_running();
1935    vdev->broken = false;
1936    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1937        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1938        vdev->vq[i].vdev = vdev;
1939        vdev->vq[i].queue_index = i;
1940    }
1941
1942    vdev->name = name;
1943    vdev->config_len = config_size;
1944    if (vdev->config_len) {
1945        vdev->config = g_malloc0(config_size);
1946    } else {
1947        vdev->config = NULL;
1948    }
1949    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1950                                                     vdev);
1951    vdev->device_endian = virtio_default_endian();
1952    vdev->use_guest_notifier_mask = true;
1953}
1954
1955hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1956{
1957    return vdev->vq[n].vring.desc;
1958}
1959
1960hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1961{
1962    return vdev->vq[n].vring.avail;
1963}
1964
1965hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1966{
1967    return vdev->vq[n].vring.used;
1968}
1969
1970hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1971{
1972    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1973}
1974
1975hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1976{
1977    return offsetof(VRingAvail, ring) +
1978        sizeof(uint16_t) * vdev->vq[n].vring.num;
1979}
1980
1981hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1982{
1983    return offsetof(VRingUsed, ring) +
1984        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1985}
1986
1987uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1988{
1989    return vdev->vq[n].last_avail_idx;
1990}
1991
1992void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1993{
1994    vdev->vq[n].last_avail_idx = idx;
1995    vdev->vq[n].shadow_avail_idx = idx;
1996}
1997
1998void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1999{
2000    vdev->vq[n].signalled_used_valid = false;
2001}
2002
2003VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2004{
2005    return vdev->vq + n;
2006}
2007
2008uint16_t virtio_get_queue_index(VirtQueue *vq)
2009{
2010    return vq->queue_index;
2011}
2012
2013static void virtio_queue_guest_notifier_read(EventNotifier *n)
2014{
2015    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2016    if (event_notifier_test_and_clear(n)) {
2017        virtio_notify_vector(vq->vdev, vq->vector);
2018    }
2019}
2020
2021void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2022                                                bool with_irqfd)
2023{
2024    if (assign && !with_irqfd) {
2025        event_notifier_set_handler(&vq->guest_notifier, false,
2026                                   virtio_queue_guest_notifier_read);
2027    } else {
2028        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2029    }
2030    if (!assign) {
2031        /* Test and clear notifier before closing it,
2032         * in case poll callback didn't have time to run. */
2033        virtio_queue_guest_notifier_read(&vq->guest_notifier);
2034    }
2035}
2036
2037EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2038{
2039    return &vq->guest_notifier;
2040}
2041
2042static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2043{
2044    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2045    if (event_notifier_test_and_clear(n)) {
2046        virtio_queue_notify_aio_vq(vq);
2047    }
2048}
2049
2050void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2051                                                VirtIOHandleOutput handle_output)
2052{
2053    if (handle_output) {
2054        vq->handle_aio_output = handle_output;
2055        aio_set_event_notifier(ctx, &vq->host_notifier, true,
2056                               virtio_queue_host_notifier_aio_read);
2057    } else {
2058        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
2059        /* Test and clear notifier before after disabling event,
2060         * in case poll callback didn't have time to run. */
2061        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2062        vq->handle_aio_output = NULL;
2063    }
2064}
2065
2066void virtio_queue_host_notifier_read(EventNotifier *n)
2067{
2068    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2069    if (event_notifier_test_and_clear(n)) {
2070        virtio_queue_notify_vq(vq);
2071    }
2072}
2073
2074EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2075{
2076    return &vq->host_notifier;
2077}
2078
2079void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2080{
2081    g_free(vdev->bus_name);
2082    vdev->bus_name = g_strdup(bus_name);
2083}
2084
2085void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2086{
2087    va_list ap;
2088
2089    va_start(ap, fmt);
2090    error_vreport(fmt, ap);
2091    va_end(ap);
2092
2093    vdev->broken = true;
2094
2095    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2096        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2097        virtio_notify_config(vdev);
2098    }
2099}
2100
2101static void virtio_device_realize(DeviceState *dev, Error **errp)
2102{
2103    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2104    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2105    Error *err = NULL;
2106
2107    /* Devices should either use vmsd or the load/save methods */
2108    assert(!vdc->vmsd || !vdc->load);
2109
2110    if (vdc->realize != NULL) {
2111        vdc->realize(dev, &err);
2112        if (err != NULL) {
2113            error_propagate(errp, err);
2114            return;
2115        }
2116    }
2117
2118    virtio_bus_device_plugged(vdev, &err);
2119    if (err != NULL) {
2120        error_propagate(errp, err);
2121        return;
2122    }
2123}
2124
2125static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2126{
2127    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2128    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2129    Error *err = NULL;
2130
2131    virtio_bus_device_unplugged(vdev);
2132
2133    if (vdc->unrealize != NULL) {
2134        vdc->unrealize(dev, &err);
2135        if (err != NULL) {
2136            error_propagate(errp, err);
2137            return;
2138        }
2139    }
2140
2141    g_free(vdev->bus_name);
2142    vdev->bus_name = NULL;
2143}
2144
2145static Property virtio_properties[] = {
2146    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2147    DEFINE_PROP_END_OF_LIST(),
2148};
2149
2150static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2151{
2152    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2153    int n, r, err;
2154
2155    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2156        VirtQueue *vq = &vdev->vq[n];
2157        if (!virtio_queue_get_num(vdev, n)) {
2158            continue;
2159        }
2160        r = virtio_bus_set_host_notifier(qbus, n, true);
2161        if (r < 0) {
2162            err = r;
2163            goto assign_error;
2164        }
2165        event_notifier_set_handler(&vq->host_notifier, true,
2166                                   virtio_queue_host_notifier_read);
2167    }
2168
2169    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2170        /* Kick right away to begin processing requests already in vring */
2171        VirtQueue *vq = &vdev->vq[n];
2172        if (!vq->vring.num) {
2173            continue;
2174        }
2175        event_notifier_set(&vq->host_notifier);
2176    }
2177    return 0;
2178
2179assign_error:
2180    while (--n >= 0) {
2181        VirtQueue *vq = &vdev->vq[n];
2182        if (!virtio_queue_get_num(vdev, n)) {
2183            continue;
2184        }
2185
2186        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2187        r = virtio_bus_set_host_notifier(qbus, n, false);
2188        assert(r >= 0);
2189    }
2190    return err;
2191}
2192
2193int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2194{
2195    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2196    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2197
2198    return virtio_bus_start_ioeventfd(vbus);
2199}
2200
2201static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2202{
2203    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2204    int n, r;
2205
2206    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2207        VirtQueue *vq = &vdev->vq[n];
2208
2209        if (!virtio_queue_get_num(vdev, n)) {
2210            continue;
2211        }
2212        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2213        r = virtio_bus_set_host_notifier(qbus, n, false);
2214        assert(r >= 0);
2215    }
2216}
2217
2218void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2219{
2220    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2221    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2222
2223    virtio_bus_stop_ioeventfd(vbus);
2224}
2225
2226int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2227{
2228    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2229    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2230
2231    return virtio_bus_grab_ioeventfd(vbus);
2232}
2233
2234void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2235{
2236    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2237    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2238
2239    virtio_bus_release_ioeventfd(vbus);
2240}
2241
2242static void virtio_device_class_init(ObjectClass *klass, void *data)
2243{
2244    /* Set the default value here. */
2245    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2246    DeviceClass *dc = DEVICE_CLASS(klass);
2247
2248    dc->realize = virtio_device_realize;
2249    dc->unrealize = virtio_device_unrealize;
2250    dc->bus_type = TYPE_VIRTIO_BUS;
2251    dc->props = virtio_properties;
2252    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2253    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2254
2255    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2256}
2257
2258bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2259{
2260    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2261    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2262
2263    return virtio_bus_ioeventfd_enabled(vbus);
2264}
2265
2266static const TypeInfo virtio_device_info = {
2267    .name = TYPE_VIRTIO_DEVICE,
2268    .parent = TYPE_DEVICE,
2269    .instance_size = sizeof(VirtIODevice),
2270    .class_init = virtio_device_class_init,
2271    .abstract = true,
2272    .class_size = sizeof(VirtioDeviceClass),
2273};
2274
2275static void virtio_register_types(void)
2276{
2277    type_register_static(&virtio_device_info);
2278}
2279
2280type_init(virtio_register_types)
2281