qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qemu-common.h"
  17#include "cpu.h"
  18#include "trace.h"
  19#include "exec/address-spaces.h"
  20#include "qemu/error-report.h"
  21#include "hw/virtio/virtio.h"
  22#include "qemu/atomic.h"
  23#include "hw/virtio/virtio-bus.h"
  24#include "migration/migration.h"
  25#include "hw/virtio/virtio-access.h"
  26
  27/*
  28 * The alignment to use between consumer and producer parts of vring.
  29 * x86 pagesize again. This is the default, used by transports like PCI
  30 * which don't provide a means for the guest to tell the host the alignment.
  31 */
  32#define VIRTIO_PCI_VRING_ALIGN         4096
  33
  34typedef struct VRingDesc
  35{
  36    uint64_t addr;
  37    uint32_t len;
  38    uint16_t flags;
  39    uint16_t next;
  40} VRingDesc;
  41
  42typedef struct VRingAvail
  43{
  44    uint16_t flags;
  45    uint16_t idx;
  46    uint16_t ring[0];
  47} VRingAvail;
  48
  49typedef struct VRingUsedElem
  50{
  51    uint32_t id;
  52    uint32_t len;
  53} VRingUsedElem;
  54
  55typedef struct VRingUsed
  56{
  57    uint16_t flags;
  58    uint16_t idx;
  59    VRingUsedElem ring[0];
  60} VRingUsed;
  61
  62typedef struct VRing
  63{
  64    unsigned int num;
  65    unsigned int num_default;
  66    unsigned int align;
  67    hwaddr desc;
  68    hwaddr avail;
  69    hwaddr used;
  70} VRing;
  71
  72struct VirtQueue
  73{
  74    VRing vring;
  75
  76    /* Next head to pop */
  77    uint16_t last_avail_idx;
  78
  79    /* Last avail_idx read from VQ. */
  80    uint16_t shadow_avail_idx;
  81
  82    uint16_t used_idx;
  83
  84    /* Last used index value we have signalled on */
  85    uint16_t signalled_used;
  86
  87    /* Last used index value we have signalled on */
  88    bool signalled_used_valid;
  89
  90    /* Notification enabled? */
  91    bool notification;
  92
  93    uint16_t queue_index;
  94
  95    unsigned int inuse;
  96
  97    uint16_t vector;
  98    VirtIOHandleOutput handle_output;
  99    VirtIOHandleOutput handle_aio_output;
 100    VirtIODevice *vdev;
 101    EventNotifier guest_notifier;
 102    EventNotifier host_notifier;
 103    QLIST_ENTRY(VirtQueue) node;
 104};
 105
 106/* virt queue functions */
 107void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 108{
 109    VRing *vring = &vdev->vq[n].vring;
 110
 111    if (!vring->desc) {
 112        /* not yet setup -> nothing to do */
 113        return;
 114    }
 115    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 116    vring->used = vring_align(vring->avail +
 117                              offsetof(VRingAvail, ring[vring->num]),
 118                              vring->align);
 119}
 120
 121static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 122                            hwaddr desc_pa, int i)
 123{
 124    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
 125                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
 126    virtio_tswap64s(vdev, &desc->addr);
 127    virtio_tswap32s(vdev, &desc->len);
 128    virtio_tswap16s(vdev, &desc->flags);
 129    virtio_tswap16s(vdev, &desc->next);
 130}
 131
 132static inline uint16_t vring_avail_flags(VirtQueue *vq)
 133{
 134    hwaddr pa;
 135    pa = vq->vring.avail + offsetof(VRingAvail, flags);
 136    return virtio_lduw_phys(vq->vdev, pa);
 137}
 138
 139static inline uint16_t vring_avail_idx(VirtQueue *vq)
 140{
 141    hwaddr pa;
 142    pa = vq->vring.avail + offsetof(VRingAvail, idx);
 143    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
 144    return vq->shadow_avail_idx;
 145}
 146
 147static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 148{
 149    hwaddr pa;
 150    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
 151    return virtio_lduw_phys(vq->vdev, pa);
 152}
 153
 154static inline uint16_t vring_get_used_event(VirtQueue *vq)
 155{
 156    return vring_avail_ring(vq, vq->vring.num);
 157}
 158
 159static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 160                                    int i)
 161{
 162    hwaddr pa;
 163    virtio_tswap32s(vq->vdev, &uelem->id);
 164    virtio_tswap32s(vq->vdev, &uelem->len);
 165    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
 166    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
 167                       (void *)uelem, sizeof(VRingUsedElem));
 168}
 169
 170static uint16_t vring_used_idx(VirtQueue *vq)
 171{
 172    hwaddr pa;
 173    pa = vq->vring.used + offsetof(VRingUsed, idx);
 174    return virtio_lduw_phys(vq->vdev, pa);
 175}
 176
 177static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 178{
 179    hwaddr pa;
 180    pa = vq->vring.used + offsetof(VRingUsed, idx);
 181    virtio_stw_phys(vq->vdev, pa, val);
 182    vq->used_idx = val;
 183}
 184
 185static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 186{
 187    VirtIODevice *vdev = vq->vdev;
 188    hwaddr pa;
 189    pa = vq->vring.used + offsetof(VRingUsed, flags);
 190    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
 191}
 192
 193static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 194{
 195    VirtIODevice *vdev = vq->vdev;
 196    hwaddr pa;
 197    pa = vq->vring.used + offsetof(VRingUsed, flags);
 198    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
 199}
 200
 201static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 202{
 203    hwaddr pa;
 204    if (!vq->notification) {
 205        return;
 206    }
 207    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
 208    virtio_stw_phys(vq->vdev, pa, val);
 209}
 210
 211void virtio_queue_set_notification(VirtQueue *vq, int enable)
 212{
 213    vq->notification = enable;
 214    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 215        vring_set_avail_event(vq, vring_avail_idx(vq));
 216    } else if (enable) {
 217        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 218    } else {
 219        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 220    }
 221    if (enable) {
 222        /* Expose avail event/used flags before caller checks the avail idx. */
 223        smp_mb();
 224    }
 225}
 226
 227int virtio_queue_ready(VirtQueue *vq)
 228{
 229    return vq->vring.avail != 0;
 230}
 231
 232/* Fetch avail_idx from VQ memory only when we really need to know if
 233 * guest has added some buffers. */
 234int virtio_queue_empty(VirtQueue *vq)
 235{
 236    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 237        return 0;
 238    }
 239
 240    return vring_avail_idx(vq) == vq->last_avail_idx;
 241}
 242
 243static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 244                               unsigned int len)
 245{
 246    unsigned int offset;
 247    int i;
 248
 249    offset = 0;
 250    for (i = 0; i < elem->in_num; i++) {
 251        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 252
 253        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
 254                                  elem->in_sg[i].iov_len,
 255                                  1, size);
 256
 257        offset += size;
 258    }
 259
 260    for (i = 0; i < elem->out_num; i++)
 261        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
 262                                  elem->out_sg[i].iov_len,
 263                                  0, elem->out_sg[i].iov_len);
 264}
 265
 266/* virtqueue_detach_element:
 267 * @vq: The #VirtQueue
 268 * @elem: The #VirtQueueElement
 269 * @len: number of bytes written
 270 *
 271 * Detach the element from the virtqueue.  This function is suitable for device
 272 * reset or other situations where a #VirtQueueElement is simply freed and will
 273 * not be pushed or discarded.
 274 */
 275void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 276                              unsigned int len)
 277{
 278    vq->inuse--;
 279    virtqueue_unmap_sg(vq, elem, len);
 280}
 281
 282/* virtqueue_unpop:
 283 * @vq: The #VirtQueue
 284 * @elem: The #VirtQueueElement
 285 * @len: number of bytes written
 286 *
 287 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 288 * call to virtqueue_pop() will refetch the element.
 289 */
 290void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 291                     unsigned int len)
 292{
 293    vq->last_avail_idx--;
 294    virtqueue_detach_element(vq, elem, len);
 295}
 296
 297/* virtqueue_rewind:
 298 * @vq: The #VirtQueue
 299 * @num: Number of elements to push back
 300 *
 301 * Pretend that elements weren't popped from the virtqueue.  The next
 302 * virtqueue_pop() will refetch the oldest element.
 303 *
 304 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 305 *
 306 * Returns: true on success, false if @num is greater than the number of in use
 307 * elements.
 308 */
 309bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 310{
 311    if (num > vq->inuse) {
 312        return false;
 313    }
 314    vq->last_avail_idx -= num;
 315    vq->inuse -= num;
 316    return true;
 317}
 318
 319void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 320                    unsigned int len, unsigned int idx)
 321{
 322    VRingUsedElem uelem;
 323
 324    trace_virtqueue_fill(vq, elem, len, idx);
 325
 326    virtqueue_unmap_sg(vq, elem, len);
 327
 328    if (unlikely(vq->vdev->broken)) {
 329        return;
 330    }
 331
 332    idx = (idx + vq->used_idx) % vq->vring.num;
 333
 334    uelem.id = elem->index;
 335    uelem.len = len;
 336    vring_used_write(vq, &uelem, idx);
 337}
 338
 339void virtqueue_flush(VirtQueue *vq, unsigned int count)
 340{
 341    uint16_t old, new;
 342
 343    if (unlikely(vq->vdev->broken)) {
 344        vq->inuse -= count;
 345        return;
 346    }
 347
 348    /* Make sure buffer is written before we update index. */
 349    smp_wmb();
 350    trace_virtqueue_flush(vq, count);
 351    old = vq->used_idx;
 352    new = old + count;
 353    vring_used_idx_set(vq, new);
 354    vq->inuse -= count;
 355    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 356        vq->signalled_used_valid = false;
 357}
 358
 359void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 360                    unsigned int len)
 361{
 362    virtqueue_fill(vq, elem, len, 0);
 363    virtqueue_flush(vq, 1);
 364}
 365
 366static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 367{
 368    uint16_t num_heads = vring_avail_idx(vq) - idx;
 369
 370    /* Check it isn't doing very strange things with descriptor numbers. */
 371    if (num_heads > vq->vring.num) {
 372        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 373                     idx, vq->shadow_avail_idx);
 374        return -EINVAL;
 375    }
 376    /* On success, callers read a descriptor at vq->last_avail_idx.
 377     * Make sure descriptor read does not bypass avail index read. */
 378    if (num_heads) {
 379        smp_rmb();
 380    }
 381
 382    return num_heads;
 383}
 384
 385static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 386                               unsigned int *head)
 387{
 388    /* Grab the next descriptor number they're advertising, and increment
 389     * the index we've seen. */
 390    *head = vring_avail_ring(vq, idx % vq->vring.num);
 391
 392    /* If their number is silly, that's a fatal mistake. */
 393    if (*head >= vq->vring.num) {
 394        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 395        return false;
 396    }
 397
 398    return true;
 399}
 400
 401enum {
 402    VIRTQUEUE_READ_DESC_ERROR = -1,
 403    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 404    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 405};
 406
 407static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 408                                    hwaddr desc_pa, unsigned int max,
 409                                    unsigned int *next)
 410{
 411    /* If this descriptor says it doesn't chain, we're done. */
 412    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 413        return VIRTQUEUE_READ_DESC_DONE;
 414    }
 415
 416    /* Check they're not leading us off end of descriptors. */
 417    *next = desc->next;
 418    /* Make sure compiler knows to grab that: we don't want it changing! */
 419    smp_wmb();
 420
 421    if (*next >= max) {
 422        virtio_error(vdev, "Desc next is %u", *next);
 423        return VIRTQUEUE_READ_DESC_ERROR;
 424    }
 425
 426    vring_desc_read(vdev, desc, desc_pa, *next);
 427    return VIRTQUEUE_READ_DESC_MORE;
 428}
 429
 430void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 431                               unsigned int *out_bytes,
 432                               unsigned max_in_bytes, unsigned max_out_bytes)
 433{
 434    unsigned int idx;
 435    unsigned int total_bufs, in_total, out_total;
 436    int rc;
 437
 438    idx = vq->last_avail_idx;
 439
 440    total_bufs = in_total = out_total = 0;
 441    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 442        VirtIODevice *vdev = vq->vdev;
 443        unsigned int max, num_bufs, indirect = 0;
 444        VRingDesc desc;
 445        hwaddr desc_pa;
 446        unsigned int i;
 447
 448        max = vq->vring.num;
 449        num_bufs = total_bufs;
 450
 451        if (!virtqueue_get_head(vq, idx++, &i)) {
 452            goto err;
 453        }
 454
 455        desc_pa = vq->vring.desc;
 456        vring_desc_read(vdev, &desc, desc_pa, i);
 457
 458        if (desc.flags & VRING_DESC_F_INDIRECT) {
 459            if (desc.len % sizeof(VRingDesc)) {
 460                virtio_error(vdev, "Invalid size for indirect buffer table");
 461                goto err;
 462            }
 463
 464            /* If we've got too many, that implies a descriptor loop. */
 465            if (num_bufs >= max) {
 466                virtio_error(vdev, "Looped descriptor");
 467                goto err;
 468            }
 469
 470            /* loop over the indirect descriptor table */
 471            indirect = 1;
 472            max = desc.len / sizeof(VRingDesc);
 473            desc_pa = desc.addr;
 474            num_bufs = i = 0;
 475            vring_desc_read(vdev, &desc, desc_pa, i);
 476        }
 477
 478        do {
 479            /* If we've got too many, that implies a descriptor loop. */
 480            if (++num_bufs > max) {
 481                virtio_error(vdev, "Looped descriptor");
 482                goto err;
 483            }
 484
 485            if (desc.flags & VRING_DESC_F_WRITE) {
 486                in_total += desc.len;
 487            } else {
 488                out_total += desc.len;
 489            }
 490            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
 491                goto done;
 492            }
 493
 494            rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
 495        } while (rc == VIRTQUEUE_READ_DESC_MORE);
 496
 497        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
 498            goto err;
 499        }
 500
 501        if (!indirect)
 502            total_bufs = num_bufs;
 503        else
 504            total_bufs++;
 505    }
 506
 507    if (rc < 0) {
 508        goto err;
 509    }
 510
 511done:
 512    if (in_bytes) {
 513        *in_bytes = in_total;
 514    }
 515    if (out_bytes) {
 516        *out_bytes = out_total;
 517    }
 518    return;
 519
 520err:
 521    in_total = out_total = 0;
 522    goto done;
 523}
 524
 525int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 526                          unsigned int out_bytes)
 527{
 528    unsigned int in_total, out_total;
 529
 530    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
 531    return in_bytes <= in_total && out_bytes <= out_total;
 532}
 533
 534static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
 535                               hwaddr *addr, struct iovec *iov,
 536                               unsigned int max_num_sg, bool is_write,
 537                               hwaddr pa, size_t sz)
 538{
 539    bool ok = false;
 540    unsigned num_sg = *p_num_sg;
 541    assert(num_sg <= max_num_sg);
 542
 543    if (!sz) {
 544        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
 545        goto out;
 546    }
 547
 548    while (sz) {
 549        hwaddr len = sz;
 550
 551        if (num_sg == max_num_sg) {
 552            virtio_error(vdev, "virtio: too many write descriptors in "
 553                               "indirect table");
 554            goto out;
 555        }
 556
 557        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
 558        if (!iov[num_sg].iov_base) {
 559            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
 560            goto out;
 561        }
 562
 563        iov[num_sg].iov_len = len;
 564        addr[num_sg] = pa;
 565
 566        sz -= len;
 567        pa += len;
 568        num_sg++;
 569    }
 570    ok = true;
 571
 572out:
 573    *p_num_sg = num_sg;
 574    return ok;
 575}
 576
 577/* Only used by error code paths before we have a VirtQueueElement (therefore
 578 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
 579 * yet.
 580 */
 581static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
 582                                    struct iovec *iov)
 583{
 584    unsigned int i;
 585
 586    for (i = 0; i < out_num + in_num; i++) {
 587        int is_write = i >= out_num;
 588
 589        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
 590        iov++;
 591    }
 592}
 593
 594static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
 595                                unsigned int *num_sg,
 596                                int is_write)
 597{
 598    unsigned int i;
 599    hwaddr len;
 600
 601    for (i = 0; i < *num_sg; i++) {
 602        len = sg[i].iov_len;
 603        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
 604        if (!sg[i].iov_base) {
 605            error_report("virtio: error trying to map MMIO memory");
 606            exit(1);
 607        }
 608        if (len != sg[i].iov_len) {
 609            error_report("virtio: unexpected memory split");
 610            exit(1);
 611        }
 612    }
 613}
 614
 615void virtqueue_map(VirtQueueElement *elem)
 616{
 617    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 1);
 618    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 0);
 619}
 620
 621static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 622{
 623    VirtQueueElement *elem;
 624    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
 625    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
 626    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
 627    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
 628    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
 629    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
 630
 631    assert(sz >= sizeof(VirtQueueElement));
 632    elem = g_malloc(out_sg_end);
 633    elem->out_num = out_num;
 634    elem->in_num = in_num;
 635    elem->in_addr = (void *)elem + in_addr_ofs;
 636    elem->out_addr = (void *)elem + out_addr_ofs;
 637    elem->in_sg = (void *)elem + in_sg_ofs;
 638    elem->out_sg = (void *)elem + out_sg_ofs;
 639    return elem;
 640}
 641
 642void *virtqueue_pop(VirtQueue *vq, size_t sz)
 643{
 644    unsigned int i, head, max;
 645    hwaddr desc_pa = vq->vring.desc;
 646    VirtIODevice *vdev = vq->vdev;
 647    VirtQueueElement *elem;
 648    unsigned out_num, in_num;
 649    hwaddr addr[VIRTQUEUE_MAX_SIZE];
 650    struct iovec iov[VIRTQUEUE_MAX_SIZE];
 651    VRingDesc desc;
 652    int rc;
 653
 654    if (unlikely(vdev->broken)) {
 655        return NULL;
 656    }
 657    if (virtio_queue_empty(vq)) {
 658        return NULL;
 659    }
 660    /* Needed after virtio_queue_empty(), see comment in
 661     * virtqueue_num_heads(). */
 662    smp_rmb();
 663
 664    /* When we start there are none of either input nor output. */
 665    out_num = in_num = 0;
 666
 667    max = vq->vring.num;
 668
 669    if (vq->inuse >= vq->vring.num) {
 670        virtio_error(vdev, "Virtqueue size exceeded");
 671        return NULL;
 672    }
 673
 674    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
 675        return NULL;
 676    }
 677
 678    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
 679        vring_set_avail_event(vq, vq->last_avail_idx);
 680    }
 681
 682    i = head;
 683    vring_desc_read(vdev, &desc, desc_pa, i);
 684    if (desc.flags & VRING_DESC_F_INDIRECT) {
 685        if (desc.len % sizeof(VRingDesc)) {
 686            virtio_error(vdev, "Invalid size for indirect buffer table");
 687            return NULL;
 688        }
 689
 690        /* loop over the indirect descriptor table */
 691        max = desc.len / sizeof(VRingDesc);
 692        desc_pa = desc.addr;
 693        i = 0;
 694        vring_desc_read(vdev, &desc, desc_pa, i);
 695    }
 696
 697    /* Collect all the descriptors */
 698    do {
 699        bool map_ok;
 700
 701        if (desc.flags & VRING_DESC_F_WRITE) {
 702            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
 703                                        iov + out_num,
 704                                        VIRTQUEUE_MAX_SIZE - out_num, true,
 705                                        desc.addr, desc.len);
 706        } else {
 707            if (in_num) {
 708                virtio_error(vdev, "Incorrect order for descriptors");
 709                goto err_undo_map;
 710            }
 711            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
 712                                        VIRTQUEUE_MAX_SIZE, false,
 713                                        desc.addr, desc.len);
 714        }
 715        if (!map_ok) {
 716            goto err_undo_map;
 717        }
 718
 719        /* If we've got too many, that implies a descriptor loop. */
 720        if ((in_num + out_num) > max) {
 721            virtio_error(vdev, "Looped descriptor");
 722            goto err_undo_map;
 723        }
 724
 725        rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
 726    } while (rc == VIRTQUEUE_READ_DESC_MORE);
 727
 728    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
 729        goto err_undo_map;
 730    }
 731
 732    /* Now copy what we have collected and mapped */
 733    elem = virtqueue_alloc_element(sz, out_num, in_num);
 734    elem->index = head;
 735    for (i = 0; i < out_num; i++) {
 736        elem->out_addr[i] = addr[i];
 737        elem->out_sg[i] = iov[i];
 738    }
 739    for (i = 0; i < in_num; i++) {
 740        elem->in_addr[i] = addr[out_num + i];
 741        elem->in_sg[i] = iov[out_num + i];
 742    }
 743
 744    vq->inuse++;
 745
 746    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 747    return elem;
 748
 749err_undo_map:
 750    virtqueue_undo_map_desc(out_num, in_num, iov);
 751    return NULL;
 752}
 753
 754/* Reading and writing a structure directly to QEMUFile is *awful*, but
 755 * it is what QEMU has always done by mistake.  We can change it sooner
 756 * or later by bumping the version number of the affected vm states.
 757 * In the meanwhile, since the in-memory layout of VirtQueueElement
 758 * has changed, we need to marshal to and from the layout that was
 759 * used before the change.
 760 */
 761typedef struct VirtQueueElementOld {
 762    unsigned int index;
 763    unsigned int out_num;
 764    unsigned int in_num;
 765    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
 766    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
 767    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
 768    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 769} VirtQueueElementOld;
 770
 771void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
 772{
 773    VirtQueueElement *elem;
 774    VirtQueueElementOld data;
 775    int i;
 776
 777    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 778
 779    /* TODO: teach all callers that this can fail, and return failure instead
 780     * of asserting here.
 781     * When we do, we might be able to re-enable NDEBUG below.
 782     */
 783#ifdef NDEBUG
 784#error building with NDEBUG is not supported
 785#endif
 786    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
 787    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
 788
 789    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
 790    elem->index = data.index;
 791
 792    for (i = 0; i < elem->in_num; i++) {
 793        elem->in_addr[i] = data.in_addr[i];
 794    }
 795
 796    for (i = 0; i < elem->out_num; i++) {
 797        elem->out_addr[i] = data.out_addr[i];
 798    }
 799
 800    for (i = 0; i < elem->in_num; i++) {
 801        /* Base is overwritten by virtqueue_map.  */
 802        elem->in_sg[i].iov_base = 0;
 803        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
 804    }
 805
 806    for (i = 0; i < elem->out_num; i++) {
 807        /* Base is overwritten by virtqueue_map.  */
 808        elem->out_sg[i].iov_base = 0;
 809        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
 810    }
 811
 812    virtqueue_map(elem);
 813    return elem;
 814}
 815
 816void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
 817{
 818    VirtQueueElementOld data;
 819    int i;
 820
 821    memset(&data, 0, sizeof(data));
 822    data.index = elem->index;
 823    data.in_num = elem->in_num;
 824    data.out_num = elem->out_num;
 825
 826    for (i = 0; i < elem->in_num; i++) {
 827        data.in_addr[i] = elem->in_addr[i];
 828    }
 829
 830    for (i = 0; i < elem->out_num; i++) {
 831        data.out_addr[i] = elem->out_addr[i];
 832    }
 833
 834    for (i = 0; i < elem->in_num; i++) {
 835        /* Base is overwritten by virtqueue_map when loading.  Do not
 836         * save it, as it would leak the QEMU address space layout.  */
 837        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
 838    }
 839
 840    for (i = 0; i < elem->out_num; i++) {
 841        /* Do not save iov_base as above.  */
 842        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
 843    }
 844    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 845}
 846
 847/* virtio device */
 848static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 849{
 850    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 851    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 852
 853    if (unlikely(vdev->broken)) {
 854        return;
 855    }
 856
 857    if (k->notify) {
 858        k->notify(qbus->parent, vector);
 859    }
 860}
 861
 862void virtio_update_irq(VirtIODevice *vdev)
 863{
 864    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 865}
 866
 867static int virtio_validate_features(VirtIODevice *vdev)
 868{
 869    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 870
 871    if (k->validate_features) {
 872        return k->validate_features(vdev);
 873    } else {
 874        return 0;
 875    }
 876}
 877
 878int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 879{
 880    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 881    trace_virtio_set_status(vdev, val);
 882
 883    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 884        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
 885            val & VIRTIO_CONFIG_S_FEATURES_OK) {
 886            int ret = virtio_validate_features(vdev);
 887
 888            if (ret) {
 889                return ret;
 890            }
 891        }
 892    }
 893    if (k->set_status) {
 894        k->set_status(vdev, val);
 895    }
 896    vdev->status = val;
 897    return 0;
 898}
 899
 900bool target_words_bigendian(void);
 901static enum virtio_device_endian virtio_default_endian(void)
 902{
 903    if (target_words_bigendian()) {
 904        return VIRTIO_DEVICE_ENDIAN_BIG;
 905    } else {
 906        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 907    }
 908}
 909
 910static enum virtio_device_endian virtio_current_cpu_endian(void)
 911{
 912    CPUClass *cc = CPU_GET_CLASS(current_cpu);
 913
 914    if (cc->virtio_is_big_endian(current_cpu)) {
 915        return VIRTIO_DEVICE_ENDIAN_BIG;
 916    } else {
 917        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 918    }
 919}
 920
 921void virtio_reset(void *opaque)
 922{
 923    VirtIODevice *vdev = opaque;
 924    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 925    int i;
 926
 927    virtio_set_status(vdev, 0);
 928    if (current_cpu) {
 929        /* Guest initiated reset */
 930        vdev->device_endian = virtio_current_cpu_endian();
 931    } else {
 932        /* System reset */
 933        vdev->device_endian = virtio_default_endian();
 934    }
 935
 936    if (k->reset) {
 937        k->reset(vdev);
 938    }
 939
 940    vdev->broken = false;
 941    vdev->guest_features = 0;
 942    vdev->queue_sel = 0;
 943    vdev->status = 0;
 944    atomic_set(&vdev->isr, 0);
 945    vdev->config_vector = VIRTIO_NO_VECTOR;
 946    virtio_notify_vector(vdev, vdev->config_vector);
 947
 948    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
 949        vdev->vq[i].vring.desc = 0;
 950        vdev->vq[i].vring.avail = 0;
 951        vdev->vq[i].vring.used = 0;
 952        vdev->vq[i].last_avail_idx = 0;
 953        vdev->vq[i].shadow_avail_idx = 0;
 954        vdev->vq[i].used_idx = 0;
 955        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 956        vdev->vq[i].signalled_used = 0;
 957        vdev->vq[i].signalled_used_valid = false;
 958        vdev->vq[i].notification = true;
 959        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
 960        vdev->vq[i].inuse = 0;
 961    }
 962}
 963
 964uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 965{
 966    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 967    uint8_t val;
 968
 969    if (addr + sizeof(val) > vdev->config_len) {
 970        return (uint32_t)-1;
 971    }
 972
 973    k->get_config(vdev, vdev->config);
 974
 975    val = ldub_p(vdev->config + addr);
 976    return val;
 977}
 978
 979uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 980{
 981    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 982    uint16_t val;
 983
 984    if (addr + sizeof(val) > vdev->config_len) {
 985        return (uint32_t)-1;
 986    }
 987
 988    k->get_config(vdev, vdev->config);
 989
 990    val = lduw_p(vdev->config + addr);
 991    return val;
 992}
 993
 994uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 995{
 996    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 997    uint32_t val;
 998
 999    if (addr + sizeof(val) > vdev->config_len) {
1000        return (uint32_t)-1;
1001    }
1002
1003    k->get_config(vdev, vdev->config);
1004
1005    val = ldl_p(vdev->config + addr);
1006    return val;
1007}
1008
1009void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1010{
1011    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1012    uint8_t val = data;
1013
1014    if (addr + sizeof(val) > vdev->config_len) {
1015        return;
1016    }
1017
1018    stb_p(vdev->config + addr, val);
1019
1020    if (k->set_config) {
1021        k->set_config(vdev, vdev->config);
1022    }
1023}
1024
1025void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1026{
1027    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1028    uint16_t val = data;
1029
1030    if (addr + sizeof(val) > vdev->config_len) {
1031        return;
1032    }
1033
1034    stw_p(vdev->config + addr, val);
1035
1036    if (k->set_config) {
1037        k->set_config(vdev, vdev->config);
1038    }
1039}
1040
1041void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1042{
1043    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1044    uint32_t val = data;
1045
1046    if (addr + sizeof(val) > vdev->config_len) {
1047        return;
1048    }
1049
1050    stl_p(vdev->config + addr, val);
1051
1052    if (k->set_config) {
1053        k->set_config(vdev, vdev->config);
1054    }
1055}
1056
1057uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1058{
1059    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1060    uint8_t val;
1061
1062    if (addr + sizeof(val) > vdev->config_len) {
1063        return (uint32_t)-1;
1064    }
1065
1066    k->get_config(vdev, vdev->config);
1067
1068    val = ldub_p(vdev->config + addr);
1069    return val;
1070}
1071
1072uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1073{
1074    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1075    uint16_t val;
1076
1077    if (addr + sizeof(val) > vdev->config_len) {
1078        return (uint32_t)-1;
1079    }
1080
1081    k->get_config(vdev, vdev->config);
1082
1083    val = lduw_le_p(vdev->config + addr);
1084    return val;
1085}
1086
1087uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1088{
1089    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1090    uint32_t val;
1091
1092    if (addr + sizeof(val) > vdev->config_len) {
1093        return (uint32_t)-1;
1094    }
1095
1096    k->get_config(vdev, vdev->config);
1097
1098    val = ldl_le_p(vdev->config + addr);
1099    return val;
1100}
1101
1102void virtio_config_modern_writeb(VirtIODevice *vdev,
1103                                 uint32_t addr, uint32_t data)
1104{
1105    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1106    uint8_t val = data;
1107
1108    if (addr + sizeof(val) > vdev->config_len) {
1109        return;
1110    }
1111
1112    stb_p(vdev->config + addr, val);
1113
1114    if (k->set_config) {
1115        k->set_config(vdev, vdev->config);
1116    }
1117}
1118
1119void virtio_config_modern_writew(VirtIODevice *vdev,
1120                                 uint32_t addr, uint32_t data)
1121{
1122    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1123    uint16_t val = data;
1124
1125    if (addr + sizeof(val) > vdev->config_len) {
1126        return;
1127    }
1128
1129    stw_le_p(vdev->config + addr, val);
1130
1131    if (k->set_config) {
1132        k->set_config(vdev, vdev->config);
1133    }
1134}
1135
1136void virtio_config_modern_writel(VirtIODevice *vdev,
1137                                 uint32_t addr, uint32_t data)
1138{
1139    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1140    uint32_t val = data;
1141
1142    if (addr + sizeof(val) > vdev->config_len) {
1143        return;
1144    }
1145
1146    stl_le_p(vdev->config + addr, val);
1147
1148    if (k->set_config) {
1149        k->set_config(vdev, vdev->config);
1150    }
1151}
1152
1153void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1154{
1155    vdev->vq[n].vring.desc = addr;
1156    virtio_queue_update_rings(vdev, n);
1157}
1158
1159hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1160{
1161    return vdev->vq[n].vring.desc;
1162}
1163
1164void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1165                            hwaddr avail, hwaddr used)
1166{
1167    vdev->vq[n].vring.desc = desc;
1168    vdev->vq[n].vring.avail = avail;
1169    vdev->vq[n].vring.used = used;
1170}
1171
1172void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1173{
1174    /* Don't allow guest to flip queue between existent and
1175     * nonexistent states, or to set it to an invalid size.
1176     */
1177    if (!!num != !!vdev->vq[n].vring.num ||
1178        num > VIRTQUEUE_MAX_SIZE ||
1179        num < 0) {
1180        return;
1181    }
1182    vdev->vq[n].vring.num = num;
1183}
1184
1185VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1186{
1187    return QLIST_FIRST(&vdev->vector_queues[vector]);
1188}
1189
1190VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1191{
1192    return QLIST_NEXT(vq, node);
1193}
1194
1195int virtio_queue_get_num(VirtIODevice *vdev, int n)
1196{
1197    return vdev->vq[n].vring.num;
1198}
1199
1200int virtio_get_num_queues(VirtIODevice *vdev)
1201{
1202    int i;
1203
1204    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1205        if (!virtio_queue_get_num(vdev, i)) {
1206            break;
1207        }
1208    }
1209
1210    return i;
1211}
1212
1213void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1214{
1215    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1216    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1217
1218    /* virtio-1 compliant devices cannot change the alignment */
1219    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1220        error_report("tried to modify queue alignment for virtio-1 device");
1221        return;
1222    }
1223    /* Check that the transport told us it was going to do this
1224     * (so a buggy transport will immediately assert rather than
1225     * silently failing to migrate this state)
1226     */
1227    assert(k->has_variable_vring_alignment);
1228
1229    vdev->vq[n].vring.align = align;
1230    virtio_queue_update_rings(vdev, n);
1231}
1232
1233static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1234{
1235    if (vq->vring.desc && vq->handle_aio_output) {
1236        VirtIODevice *vdev = vq->vdev;
1237
1238        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1239        vq->handle_aio_output(vdev, vq);
1240    }
1241}
1242
1243static void virtio_queue_notify_vq(VirtQueue *vq)
1244{
1245    if (vq->vring.desc && vq->handle_output) {
1246        VirtIODevice *vdev = vq->vdev;
1247
1248        if (unlikely(vdev->broken)) {
1249            return;
1250        }
1251
1252        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1253        vq->handle_output(vdev, vq);
1254    }
1255}
1256
1257void virtio_queue_notify(VirtIODevice *vdev, int n)
1258{
1259    VirtQueue *vq = &vdev->vq[n];
1260
1261    if (unlikely(!vq->vring.desc || vdev->broken)) {
1262        return;
1263    }
1264
1265    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1266    if (vq->handle_aio_output) {
1267        event_notifier_set(&vq->host_notifier);
1268    } else if (vq->handle_output) {
1269        vq->handle_output(vdev, vq);
1270    }
1271}
1272
1273uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1274{
1275    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1276        VIRTIO_NO_VECTOR;
1277}
1278
1279void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1280{
1281    VirtQueue *vq = &vdev->vq[n];
1282
1283    if (n < VIRTIO_QUEUE_MAX) {
1284        if (vdev->vector_queues &&
1285            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1286            QLIST_REMOVE(vq, node);
1287        }
1288        vdev->vq[n].vector = vector;
1289        if (vdev->vector_queues &&
1290            vector != VIRTIO_NO_VECTOR) {
1291            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1292        }
1293    }
1294}
1295
1296VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1297                            VirtIOHandleOutput handle_output)
1298{
1299    int i;
1300
1301    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1302        if (vdev->vq[i].vring.num == 0)
1303            break;
1304    }
1305
1306    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1307        abort();
1308
1309    vdev->vq[i].vring.num = queue_size;
1310    vdev->vq[i].vring.num_default = queue_size;
1311    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1312    vdev->vq[i].handle_output = handle_output;
1313    vdev->vq[i].handle_aio_output = NULL;
1314
1315    return &vdev->vq[i];
1316}
1317
1318void virtio_del_queue(VirtIODevice *vdev, int n)
1319{
1320    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1321        abort();
1322    }
1323
1324    vdev->vq[n].vring.num = 0;
1325    vdev->vq[n].vring.num_default = 0;
1326}
1327
1328static void virtio_set_isr(VirtIODevice *vdev, int value)
1329{
1330    uint8_t old = atomic_read(&vdev->isr);
1331
1332    /* Do not write ISR if it does not change, so that its cacheline remains
1333     * shared in the common case where the guest does not read it.
1334     */
1335    if ((old & value) != value) {
1336        atomic_or(&vdev->isr, value);
1337    }
1338}
1339
1340bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1341{
1342    uint16_t old, new;
1343    bool v;
1344    /* We need to expose used array entries before checking used event. */
1345    smp_mb();
1346    /* Always notify when queue is empty (when feature acknowledge) */
1347    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1348        !vq->inuse && virtio_queue_empty(vq)) {
1349        return true;
1350    }
1351
1352    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1353        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1354    }
1355
1356    v = vq->signalled_used_valid;
1357    vq->signalled_used_valid = true;
1358    old = vq->signalled_used;
1359    new = vq->signalled_used = vq->used_idx;
1360    return !v || vring_need_event(vring_get_used_event(vq), new, old);
1361}
1362
1363void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1364{
1365    if (!virtio_should_notify(vdev, vq)) {
1366        return;
1367    }
1368
1369    trace_virtio_notify_irqfd(vdev, vq);
1370
1371    /*
1372     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1373     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1374     * incorrectly polling this bit during crashdump and hibernation
1375     * in MSI mode, causing a hang if this bit is never updated.
1376     * Recent releases of Windows do not really shut down, but rather
1377     * log out and hibernate to make the next startup faster.  Hence,
1378     * this manifested as a more serious hang during shutdown with
1379     *
1380     * Next driver release from 2016 fixed this problem, so working around it
1381     * is not a must, but it's easy to do so let's do it here.
1382     *
1383     * Note: it's safe to update ISR from any thread as it was switched
1384     * to an atomic operation.
1385     */
1386    virtio_set_isr(vq->vdev, 0x1);
1387    event_notifier_set(&vq->guest_notifier);
1388}
1389
1390void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1391{
1392    if (!virtio_should_notify(vdev, vq)) {
1393        return;
1394    }
1395
1396    trace_virtio_notify(vdev, vq);
1397    virtio_set_isr(vq->vdev, 0x1);
1398    virtio_notify_vector(vdev, vq->vector);
1399}
1400
1401void virtio_notify_config(VirtIODevice *vdev)
1402{
1403    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1404        return;
1405
1406    virtio_set_isr(vdev, 0x3);
1407    vdev->generation++;
1408    virtio_notify_vector(vdev, vdev->config_vector);
1409}
1410
1411static bool virtio_device_endian_needed(void *opaque)
1412{
1413    VirtIODevice *vdev = opaque;
1414
1415    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1416    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1417        return vdev->device_endian != virtio_default_endian();
1418    }
1419    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1420    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1421}
1422
1423static bool virtio_64bit_features_needed(void *opaque)
1424{
1425    VirtIODevice *vdev = opaque;
1426
1427    return (vdev->host_features >> 32) != 0;
1428}
1429
1430static bool virtio_virtqueue_needed(void *opaque)
1431{
1432    VirtIODevice *vdev = opaque;
1433
1434    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1435}
1436
1437static bool virtio_ringsize_needed(void *opaque)
1438{
1439    VirtIODevice *vdev = opaque;
1440    int i;
1441
1442    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1443        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1444            return true;
1445        }
1446    }
1447    return false;
1448}
1449
1450static bool virtio_extra_state_needed(void *opaque)
1451{
1452    VirtIODevice *vdev = opaque;
1453    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1454    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1455
1456    return k->has_extra_state &&
1457        k->has_extra_state(qbus->parent);
1458}
1459
1460static bool virtio_broken_needed(void *opaque)
1461{
1462    VirtIODevice *vdev = opaque;
1463
1464    return vdev->broken;
1465}
1466
1467static const VMStateDescription vmstate_virtqueue = {
1468    .name = "virtqueue_state",
1469    .version_id = 1,
1470    .minimum_version_id = 1,
1471    .fields = (VMStateField[]) {
1472        VMSTATE_UINT64(vring.avail, struct VirtQueue),
1473        VMSTATE_UINT64(vring.used, struct VirtQueue),
1474        VMSTATE_END_OF_LIST()
1475    }
1476};
1477
1478static const VMStateDescription vmstate_virtio_virtqueues = {
1479    .name = "virtio/virtqueues",
1480    .version_id = 1,
1481    .minimum_version_id = 1,
1482    .needed = &virtio_virtqueue_needed,
1483    .fields = (VMStateField[]) {
1484        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1485                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1486        VMSTATE_END_OF_LIST()
1487    }
1488};
1489
1490static const VMStateDescription vmstate_ringsize = {
1491    .name = "ringsize_state",
1492    .version_id = 1,
1493    .minimum_version_id = 1,
1494    .fields = (VMStateField[]) {
1495        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1496        VMSTATE_END_OF_LIST()
1497    }
1498};
1499
1500static const VMStateDescription vmstate_virtio_ringsize = {
1501    .name = "virtio/ringsize",
1502    .version_id = 1,
1503    .minimum_version_id = 1,
1504    .needed = &virtio_ringsize_needed,
1505    .fields = (VMStateField[]) {
1506        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1507                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1508        VMSTATE_END_OF_LIST()
1509    }
1510};
1511
1512static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1513{
1514    VirtIODevice *vdev = pv;
1515    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1516    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1517
1518    if (!k->load_extra_state) {
1519        return -1;
1520    } else {
1521        return k->load_extra_state(qbus->parent, f);
1522    }
1523}
1524
1525static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1526{
1527    VirtIODevice *vdev = pv;
1528    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1529    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1530
1531    k->save_extra_state(qbus->parent, f);
1532}
1533
1534static const VMStateInfo vmstate_info_extra_state = {
1535    .name = "virtqueue_extra_state",
1536    .get = get_extra_state,
1537    .put = put_extra_state,
1538};
1539
1540static const VMStateDescription vmstate_virtio_extra_state = {
1541    .name = "virtio/extra_state",
1542    .version_id = 1,
1543    .minimum_version_id = 1,
1544    .needed = &virtio_extra_state_needed,
1545    .fields = (VMStateField[]) {
1546        {
1547            .name         = "extra_state",
1548            .version_id   = 0,
1549            .field_exists = NULL,
1550            .size         = 0,
1551            .info         = &vmstate_info_extra_state,
1552            .flags        = VMS_SINGLE,
1553            .offset       = 0,
1554        },
1555        VMSTATE_END_OF_LIST()
1556    }
1557};
1558
1559static const VMStateDescription vmstate_virtio_device_endian = {
1560    .name = "virtio/device_endian",
1561    .version_id = 1,
1562    .minimum_version_id = 1,
1563    .needed = &virtio_device_endian_needed,
1564    .fields = (VMStateField[]) {
1565        VMSTATE_UINT8(device_endian, VirtIODevice),
1566        VMSTATE_END_OF_LIST()
1567    }
1568};
1569
1570static const VMStateDescription vmstate_virtio_64bit_features = {
1571    .name = "virtio/64bit_features",
1572    .version_id = 1,
1573    .minimum_version_id = 1,
1574    .needed = &virtio_64bit_features_needed,
1575    .fields = (VMStateField[]) {
1576        VMSTATE_UINT64(guest_features, VirtIODevice),
1577        VMSTATE_END_OF_LIST()
1578    }
1579};
1580
1581static const VMStateDescription vmstate_virtio_broken = {
1582    .name = "virtio/broken",
1583    .version_id = 1,
1584    .minimum_version_id = 1,
1585    .needed = &virtio_broken_needed,
1586    .fields = (VMStateField[]) {
1587        VMSTATE_BOOL(broken, VirtIODevice),
1588        VMSTATE_END_OF_LIST()
1589    }
1590};
1591
1592static const VMStateDescription vmstate_virtio = {
1593    .name = "virtio",
1594    .version_id = 1,
1595    .minimum_version_id = 1,
1596    .minimum_version_id_old = 1,
1597    .fields = (VMStateField[]) {
1598        VMSTATE_END_OF_LIST()
1599    },
1600    .subsections = (const VMStateDescription*[]) {
1601        &vmstate_virtio_device_endian,
1602        &vmstate_virtio_64bit_features,
1603        &vmstate_virtio_virtqueues,
1604        &vmstate_virtio_ringsize,
1605        &vmstate_virtio_broken,
1606        &vmstate_virtio_extra_state,
1607        NULL
1608    }
1609};
1610
1611void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1612{
1613    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1614    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1615    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1616    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1617    int i;
1618
1619    if (k->save_config) {
1620        k->save_config(qbus->parent, f);
1621    }
1622
1623    qemu_put_8s(f, &vdev->status);
1624    qemu_put_8s(f, &vdev->isr);
1625    qemu_put_be16s(f, &vdev->queue_sel);
1626    qemu_put_be32s(f, &guest_features_lo);
1627    qemu_put_be32(f, vdev->config_len);
1628    qemu_put_buffer(f, vdev->config, vdev->config_len);
1629
1630    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1631        if (vdev->vq[i].vring.num == 0)
1632            break;
1633    }
1634
1635    qemu_put_be32(f, i);
1636
1637    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1638        if (vdev->vq[i].vring.num == 0)
1639            break;
1640
1641        qemu_put_be32(f, vdev->vq[i].vring.num);
1642        if (k->has_variable_vring_alignment) {
1643            qemu_put_be32(f, vdev->vq[i].vring.align);
1644        }
1645        /* XXX virtio-1 devices */
1646        qemu_put_be64(f, vdev->vq[i].vring.desc);
1647        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1648        if (k->save_queue) {
1649            k->save_queue(qbus->parent, i, f);
1650        }
1651    }
1652
1653    if (vdc->save != NULL) {
1654        vdc->save(vdev, f);
1655    }
1656
1657    if (vdc->vmsd) {
1658        vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1659    }
1660
1661    /* Subsections */
1662    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1663}
1664
1665/* A wrapper for use as a VMState .put function */
1666static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1667{
1668    virtio_save(VIRTIO_DEVICE(opaque), f);
1669}
1670
1671/* A wrapper for use as a VMState .get function */
1672static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1673{
1674    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1675    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1676
1677    return virtio_load(vdev, f, dc->vmsd->version_id);
1678}
1679
1680const VMStateInfo  virtio_vmstate_info = {
1681    .name = "virtio",
1682    .get = virtio_device_get,
1683    .put = virtio_device_put,
1684};
1685
1686static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1687{
1688    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1689    bool bad = (val & ~(vdev->host_features)) != 0;
1690
1691    val &= vdev->host_features;
1692    if (k->set_features) {
1693        k->set_features(vdev, val);
1694    }
1695    vdev->guest_features = val;
1696    return bad ? -1 : 0;
1697}
1698
1699int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1700{
1701   /*
1702     * The driver must not attempt to set features after feature negotiation
1703     * has finished.
1704     */
1705    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1706        return -EINVAL;
1707    }
1708    return virtio_set_features_nocheck(vdev, val);
1709}
1710
1711int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1712{
1713    int i, ret;
1714    int32_t config_len;
1715    uint32_t num;
1716    uint32_t features;
1717    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1718    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1719    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1720
1721    /*
1722     * We poison the endianness to ensure it does not get used before
1723     * subsections have been loaded.
1724     */
1725    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1726
1727    if (k->load_config) {
1728        ret = k->load_config(qbus->parent, f);
1729        if (ret)
1730            return ret;
1731    }
1732
1733    qemu_get_8s(f, &vdev->status);
1734    qemu_get_8s(f, &vdev->isr);
1735    qemu_get_be16s(f, &vdev->queue_sel);
1736    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1737        return -1;
1738    }
1739    qemu_get_be32s(f, &features);
1740
1741    /*
1742     * Temporarily set guest_features low bits - needed by
1743     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1744     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1745     *
1746     * Note: devices should always test host features in future - don't create
1747     * new dependencies like this.
1748     */
1749    vdev->guest_features = features;
1750
1751    config_len = qemu_get_be32(f);
1752
1753    /*
1754     * There are cases where the incoming config can be bigger or smaller
1755     * than what we have; so load what we have space for, and skip
1756     * any excess that's in the stream.
1757     */
1758    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1759
1760    while (config_len > vdev->config_len) {
1761        qemu_get_byte(f);
1762        config_len--;
1763    }
1764
1765    num = qemu_get_be32(f);
1766
1767    if (num > VIRTIO_QUEUE_MAX) {
1768        error_report("Invalid number of virtqueues: 0x%x", num);
1769        return -1;
1770    }
1771
1772    for (i = 0; i < num; i++) {
1773        vdev->vq[i].vring.num = qemu_get_be32(f);
1774        if (k->has_variable_vring_alignment) {
1775            vdev->vq[i].vring.align = qemu_get_be32(f);
1776        }
1777        vdev->vq[i].vring.desc = qemu_get_be64(f);
1778        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1779        vdev->vq[i].signalled_used_valid = false;
1780        vdev->vq[i].notification = true;
1781
1782        if (vdev->vq[i].vring.desc) {
1783            /* XXX virtio-1 devices */
1784            virtio_queue_update_rings(vdev, i);
1785        } else if (vdev->vq[i].last_avail_idx) {
1786            error_report("VQ %d address 0x0 "
1787                         "inconsistent with Host index 0x%x",
1788                         i, vdev->vq[i].last_avail_idx);
1789                return -1;
1790        }
1791        if (k->load_queue) {
1792            ret = k->load_queue(qbus->parent, i, f);
1793            if (ret)
1794                return ret;
1795        }
1796    }
1797
1798    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1799
1800    if (vdc->load != NULL) {
1801        ret = vdc->load(vdev, f, version_id);
1802        if (ret) {
1803            return ret;
1804        }
1805    }
1806
1807    if (vdc->vmsd) {
1808        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1809        if (ret) {
1810            return ret;
1811        }
1812    }
1813
1814    /* Subsections */
1815    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1816    if (ret) {
1817        return ret;
1818    }
1819
1820    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1821        vdev->device_endian = virtio_default_endian();
1822    }
1823
1824    if (virtio_64bit_features_needed(vdev)) {
1825        /*
1826         * Subsection load filled vdev->guest_features.  Run them
1827         * through virtio_set_features to sanity-check them against
1828         * host_features.
1829         */
1830        uint64_t features64 = vdev->guest_features;
1831        if (virtio_set_features_nocheck(vdev, features64) < 0) {
1832            error_report("Features 0x%" PRIx64 " unsupported. "
1833                         "Allowed features: 0x%" PRIx64,
1834                         features64, vdev->host_features);
1835            return -1;
1836        }
1837    } else {
1838        if (virtio_set_features_nocheck(vdev, features) < 0) {
1839            error_report("Features 0x%x unsupported. "
1840                         "Allowed features: 0x%" PRIx64,
1841                         features, vdev->host_features);
1842            return -1;
1843        }
1844    }
1845
1846    for (i = 0; i < num; i++) {
1847        if (vdev->vq[i].vring.desc) {
1848            uint16_t nheads;
1849            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1850            /* Check it isn't doing strange things with descriptor numbers. */
1851            if (nheads > vdev->vq[i].vring.num) {
1852                error_report("VQ %d size 0x%x Guest index 0x%x "
1853                             "inconsistent with Host index 0x%x: delta 0x%x",
1854                             i, vdev->vq[i].vring.num,
1855                             vring_avail_idx(&vdev->vq[i]),
1856                             vdev->vq[i].last_avail_idx, nheads);
1857                return -1;
1858            }
1859            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1860            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1861
1862            /*
1863             * Some devices migrate VirtQueueElements that have been popped
1864             * from the avail ring but not yet returned to the used ring.
1865             * Since max ring size < UINT16_MAX it's safe to use modulo
1866             * UINT16_MAX + 1 subtraction.
1867             */
1868            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
1869                                vdev->vq[i].used_idx);
1870            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1871                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1872                             "used_idx 0x%x",
1873                             i, vdev->vq[i].vring.num,
1874                             vdev->vq[i].last_avail_idx,
1875                             vdev->vq[i].used_idx);
1876                return -1;
1877            }
1878        }
1879    }
1880
1881    return 0;
1882}
1883
1884void virtio_cleanup(VirtIODevice *vdev)
1885{
1886    qemu_del_vm_change_state_handler(vdev->vmstate);
1887    g_free(vdev->config);
1888    g_free(vdev->vq);
1889    g_free(vdev->vector_queues);
1890}
1891
1892static void virtio_vmstate_change(void *opaque, int running, RunState state)
1893{
1894    VirtIODevice *vdev = opaque;
1895    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1896    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1897    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1898    vdev->vm_running = running;
1899
1900    if (backend_run) {
1901        virtio_set_status(vdev, vdev->status);
1902    }
1903
1904    if (k->vmstate_change) {
1905        k->vmstate_change(qbus->parent, backend_run);
1906    }
1907
1908    if (!backend_run) {
1909        virtio_set_status(vdev, vdev->status);
1910    }
1911}
1912
1913void virtio_instance_init_common(Object *proxy_obj, void *data,
1914                                 size_t vdev_size, const char *vdev_name)
1915{
1916    DeviceState *vdev = data;
1917
1918    object_initialize(vdev, vdev_size, vdev_name);
1919    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1920    object_unref(OBJECT(vdev));
1921    qdev_alias_all_properties(vdev, proxy_obj);
1922}
1923
1924void virtio_init(VirtIODevice *vdev, const char *name,
1925                 uint16_t device_id, size_t config_size)
1926{
1927    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1928    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1929    int i;
1930    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1931
1932    if (nvectors) {
1933        vdev->vector_queues =
1934            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1935    }
1936
1937    vdev->device_id = device_id;
1938    vdev->status = 0;
1939    atomic_set(&vdev->isr, 0);
1940    vdev->queue_sel = 0;
1941    vdev->config_vector = VIRTIO_NO_VECTOR;
1942    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1943    vdev->vm_running = runstate_is_running();
1944    vdev->broken = false;
1945    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1946        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1947        vdev->vq[i].vdev = vdev;
1948        vdev->vq[i].queue_index = i;
1949    }
1950
1951    vdev->name = name;
1952    vdev->config_len = config_size;
1953    if (vdev->config_len) {
1954        vdev->config = g_malloc0(config_size);
1955    } else {
1956        vdev->config = NULL;
1957    }
1958    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1959                                                     vdev);
1960    vdev->device_endian = virtio_default_endian();
1961    vdev->use_guest_notifier_mask = true;
1962}
1963
1964hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1965{
1966    return vdev->vq[n].vring.desc;
1967}
1968
1969hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1970{
1971    return vdev->vq[n].vring.avail;
1972}
1973
1974hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1975{
1976    return vdev->vq[n].vring.used;
1977}
1978
1979hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1980{
1981    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1982}
1983
1984hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1985{
1986    return offsetof(VRingAvail, ring) +
1987        sizeof(uint16_t) * vdev->vq[n].vring.num;
1988}
1989
1990hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1991{
1992    return offsetof(VRingUsed, ring) +
1993        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1994}
1995
1996uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1997{
1998    return vdev->vq[n].last_avail_idx;
1999}
2000
2001void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
2002{
2003    vdev->vq[n].last_avail_idx = idx;
2004    vdev->vq[n].shadow_avail_idx = idx;
2005}
2006
2007void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
2008{
2009    vdev->vq[n].signalled_used_valid = false;
2010}
2011
2012VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2013{
2014    return vdev->vq + n;
2015}
2016
2017uint16_t virtio_get_queue_index(VirtQueue *vq)
2018{
2019    return vq->queue_index;
2020}
2021
2022static void virtio_queue_guest_notifier_read(EventNotifier *n)
2023{
2024    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2025    if (event_notifier_test_and_clear(n)) {
2026        virtio_notify_vector(vq->vdev, vq->vector);
2027    }
2028}
2029
2030void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2031                                                bool with_irqfd)
2032{
2033    if (assign && !with_irqfd) {
2034        event_notifier_set_handler(&vq->guest_notifier, false,
2035                                   virtio_queue_guest_notifier_read);
2036    } else {
2037        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2038    }
2039    if (!assign) {
2040        /* Test and clear notifier before closing it,
2041         * in case poll callback didn't have time to run. */
2042        virtio_queue_guest_notifier_read(&vq->guest_notifier);
2043    }
2044}
2045
2046EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2047{
2048    return &vq->guest_notifier;
2049}
2050
2051static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2052{
2053    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2054    if (event_notifier_test_and_clear(n)) {
2055        virtio_queue_notify_aio_vq(vq);
2056    }
2057}
2058
2059void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2060                                                VirtIOHandleOutput handle_output)
2061{
2062    if (handle_output) {
2063        vq->handle_aio_output = handle_output;
2064        aio_set_event_notifier(ctx, &vq->host_notifier, true,
2065                               virtio_queue_host_notifier_aio_read);
2066    } else {
2067        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
2068        /* Test and clear notifier before after disabling event,
2069         * in case poll callback didn't have time to run. */
2070        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2071        vq->handle_aio_output = NULL;
2072    }
2073}
2074
2075void virtio_queue_host_notifier_read(EventNotifier *n)
2076{
2077    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2078    if (event_notifier_test_and_clear(n)) {
2079        virtio_queue_notify_vq(vq);
2080    }
2081}
2082
2083EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2084{
2085    return &vq->host_notifier;
2086}
2087
2088void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2089{
2090    g_free(vdev->bus_name);
2091    vdev->bus_name = g_strdup(bus_name);
2092}
2093
2094void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2095{
2096    va_list ap;
2097
2098    va_start(ap, fmt);
2099    error_vreport(fmt, ap);
2100    va_end(ap);
2101
2102    vdev->broken = true;
2103
2104    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2105        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2106        virtio_notify_config(vdev);
2107    }
2108}
2109
2110static void virtio_device_realize(DeviceState *dev, Error **errp)
2111{
2112    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2113    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2114    Error *err = NULL;
2115
2116    /* Devices should either use vmsd or the load/save methods */
2117    assert(!vdc->vmsd || !vdc->load);
2118
2119    if (vdc->realize != NULL) {
2120        vdc->realize(dev, &err);
2121        if (err != NULL) {
2122            error_propagate(errp, err);
2123            return;
2124        }
2125    }
2126
2127    virtio_bus_device_plugged(vdev, &err);
2128    if (err != NULL) {
2129        error_propagate(errp, err);
2130        return;
2131    }
2132}
2133
2134static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2135{
2136    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2137    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2138    Error *err = NULL;
2139
2140    virtio_bus_device_unplugged(vdev);
2141
2142    if (vdc->unrealize != NULL) {
2143        vdc->unrealize(dev, &err);
2144        if (err != NULL) {
2145            error_propagate(errp, err);
2146            return;
2147        }
2148    }
2149
2150    g_free(vdev->bus_name);
2151    vdev->bus_name = NULL;
2152}
2153
2154static Property virtio_properties[] = {
2155    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2156    DEFINE_PROP_END_OF_LIST(),
2157};
2158
2159static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2160{
2161    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2162    int n, r, err;
2163
2164    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2165        VirtQueue *vq = &vdev->vq[n];
2166        if (!virtio_queue_get_num(vdev, n)) {
2167            continue;
2168        }
2169        r = virtio_bus_set_host_notifier(qbus, n, true);
2170        if (r < 0) {
2171            err = r;
2172            goto assign_error;
2173        }
2174        event_notifier_set_handler(&vq->host_notifier, true,
2175                                   virtio_queue_host_notifier_read);
2176    }
2177
2178    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2179        /* Kick right away to begin processing requests already in vring */
2180        VirtQueue *vq = &vdev->vq[n];
2181        if (!vq->vring.num) {
2182            continue;
2183        }
2184        event_notifier_set(&vq->host_notifier);
2185    }
2186    return 0;
2187
2188assign_error:
2189    while (--n >= 0) {
2190        VirtQueue *vq = &vdev->vq[n];
2191        if (!virtio_queue_get_num(vdev, n)) {
2192            continue;
2193        }
2194
2195        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2196        r = virtio_bus_set_host_notifier(qbus, n, false);
2197        assert(r >= 0);
2198    }
2199    return err;
2200}
2201
2202int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2203{
2204    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2205    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2206
2207    return virtio_bus_start_ioeventfd(vbus);
2208}
2209
2210static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2211{
2212    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2213    int n, r;
2214
2215    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2216        VirtQueue *vq = &vdev->vq[n];
2217
2218        if (!virtio_queue_get_num(vdev, n)) {
2219            continue;
2220        }
2221        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2222        r = virtio_bus_set_host_notifier(qbus, n, false);
2223        assert(r >= 0);
2224    }
2225}
2226
2227void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2228{
2229    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2230    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2231
2232    virtio_bus_stop_ioeventfd(vbus);
2233}
2234
2235int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2236{
2237    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2238    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2239
2240    return virtio_bus_grab_ioeventfd(vbus);
2241}
2242
2243void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2244{
2245    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2246    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2247
2248    virtio_bus_release_ioeventfd(vbus);
2249}
2250
2251static void virtio_device_class_init(ObjectClass *klass, void *data)
2252{
2253    /* Set the default value here. */
2254    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2255    DeviceClass *dc = DEVICE_CLASS(klass);
2256
2257    dc->realize = virtio_device_realize;
2258    dc->unrealize = virtio_device_unrealize;
2259    dc->bus_type = TYPE_VIRTIO_BUS;
2260    dc->props = virtio_properties;
2261    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2262    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2263
2264    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2265}
2266
2267bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2268{
2269    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2270    VirtioBusState *vbus = VIRTIO_BUS(qbus);
2271
2272    return virtio_bus_ioeventfd_enabled(vbus);
2273}
2274
2275static const TypeInfo virtio_device_info = {
2276    .name = TYPE_VIRTIO_DEVICE,
2277    .parent = TYPE_DEVICE,
2278    .instance_size = sizeof(VirtIODevice),
2279    .class_init = virtio_device_class_init,
2280    .abstract = true,
2281    .class_size = sizeof(VirtioDeviceClass),
2282};
2283
2284static void virtio_register_types(void)
2285{
2286    type_register_static(&virtio_device_info);
2287}
2288
2289type_init(virtio_register_types)
2290