qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qemu-common.h"
  17#include "cpu.h"
  18#include "trace.h"
  19#include "exec/address-spaces.h"
  20#include "qemu/error-report.h"
  21#include "hw/virtio/virtio.h"
  22#include "qemu/atomic.h"
  23#include "hw/virtio/virtio-bus.h"
  24#include "migration/migration.h"
  25#include "hw/virtio/virtio-access.h"
  26
  27/*
  28 * The alignment to use between consumer and producer parts of vring.
  29 * x86 pagesize again. This is the default, used by transports like PCI
  30 * which don't provide a means for the guest to tell the host the alignment.
  31 */
  32#define VIRTIO_PCI_VRING_ALIGN         4096
  33
  34typedef struct VRingDesc
  35{
  36    uint64_t addr;
  37    uint32_t len;
  38    uint16_t flags;
  39    uint16_t next;
  40} VRingDesc;
  41
  42typedef struct VRingAvail
  43{
  44    uint16_t flags;
  45    uint16_t idx;
  46    uint16_t ring[0];
  47} VRingAvail;
  48
  49typedef struct VRingUsedElem
  50{
  51    uint32_t id;
  52    uint32_t len;
  53} VRingUsedElem;
  54
  55typedef struct VRingUsed
  56{
  57    uint16_t flags;
  58    uint16_t idx;
  59    VRingUsedElem ring[0];
  60} VRingUsed;
  61
  62typedef struct VRing
  63{
  64    unsigned int num;
  65    unsigned int num_default;
  66    unsigned int align;
  67    hwaddr desc;
  68    hwaddr avail;
  69    hwaddr used;
  70} VRing;
  71
  72struct VirtQueue
  73{
  74    VRing vring;
  75
  76    /* Next head to pop */
  77    uint16_t last_avail_idx;
  78
  79    /* Last avail_idx read from VQ. */
  80    uint16_t shadow_avail_idx;
  81
  82    uint16_t used_idx;
  83
  84    /* Last used index value we have signalled on */
  85    uint16_t signalled_used;
  86
  87    /* Last used index value we have signalled on */
  88    bool signalled_used_valid;
  89
  90    /* Notification enabled? */
  91    bool notification;
  92
  93    uint16_t queue_index;
  94
  95    int inuse;
  96
  97    uint16_t vector;
  98    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  99    void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq);
 100    VirtIODevice *vdev;
 101    EventNotifier guest_notifier;
 102    EventNotifier host_notifier;
 103    QLIST_ENTRY(VirtQueue) node;
 104};
 105
 106/* virt queue functions */
 107void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 108{
 109    VRing *vring = &vdev->vq[n].vring;
 110
 111    if (!vring->desc) {
 112        /* not yet setup -> nothing to do */
 113        return;
 114    }
 115    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 116    vring->used = vring_align(vring->avail +
 117                              offsetof(VRingAvail, ring[vring->num]),
 118                              vring->align);
 119}
 120
 121static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 122                            hwaddr desc_pa, int i)
 123{
 124    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
 125                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
 126    virtio_tswap64s(vdev, &desc->addr);
 127    virtio_tswap32s(vdev, &desc->len);
 128    virtio_tswap16s(vdev, &desc->flags);
 129    virtio_tswap16s(vdev, &desc->next);
 130}
 131
 132static inline uint16_t vring_avail_flags(VirtQueue *vq)
 133{
 134    hwaddr pa;
 135    pa = vq->vring.avail + offsetof(VRingAvail, flags);
 136    return virtio_lduw_phys(vq->vdev, pa);
 137}
 138
 139static inline uint16_t vring_avail_idx(VirtQueue *vq)
 140{
 141    hwaddr pa;
 142    pa = vq->vring.avail + offsetof(VRingAvail, idx);
 143    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
 144    return vq->shadow_avail_idx;
 145}
 146
 147static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 148{
 149    hwaddr pa;
 150    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
 151    return virtio_lduw_phys(vq->vdev, pa);
 152}
 153
 154static inline uint16_t vring_get_used_event(VirtQueue *vq)
 155{
 156    return vring_avail_ring(vq, vq->vring.num);
 157}
 158
 159static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 160                                    int i)
 161{
 162    hwaddr pa;
 163    virtio_tswap32s(vq->vdev, &uelem->id);
 164    virtio_tswap32s(vq->vdev, &uelem->len);
 165    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
 166    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
 167                       (void *)uelem, sizeof(VRingUsedElem));
 168}
 169
 170static uint16_t vring_used_idx(VirtQueue *vq)
 171{
 172    hwaddr pa;
 173    pa = vq->vring.used + offsetof(VRingUsed, idx);
 174    return virtio_lduw_phys(vq->vdev, pa);
 175}
 176
 177static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 178{
 179    hwaddr pa;
 180    pa = vq->vring.used + offsetof(VRingUsed, idx);
 181    virtio_stw_phys(vq->vdev, pa, val);
 182    vq->used_idx = val;
 183}
 184
 185static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 186{
 187    VirtIODevice *vdev = vq->vdev;
 188    hwaddr pa;
 189    pa = vq->vring.used + offsetof(VRingUsed, flags);
 190    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
 191}
 192
 193static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 194{
 195    VirtIODevice *vdev = vq->vdev;
 196    hwaddr pa;
 197    pa = vq->vring.used + offsetof(VRingUsed, flags);
 198    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
 199}
 200
 201static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 202{
 203    hwaddr pa;
 204    if (!vq->notification) {
 205        return;
 206    }
 207    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
 208    virtio_stw_phys(vq->vdev, pa, val);
 209}
 210
 211void virtio_queue_set_notification(VirtQueue *vq, int enable)
 212{
 213    vq->notification = enable;
 214    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 215        vring_set_avail_event(vq, vring_avail_idx(vq));
 216    } else if (enable) {
 217        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 218    } else {
 219        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 220    }
 221    if (enable) {
 222        /* Expose avail event/used flags before caller checks the avail idx. */
 223        smp_mb();
 224    }
 225}
 226
 227int virtio_queue_ready(VirtQueue *vq)
 228{
 229    return vq->vring.avail != 0;
 230}
 231
 232/* Fetch avail_idx from VQ memory only when we really need to know if
 233 * guest has added some buffers. */
 234int virtio_queue_empty(VirtQueue *vq)
 235{
 236    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 237        return 0;
 238    }
 239
 240    return vring_avail_idx(vq) == vq->last_avail_idx;
 241}
 242
 243static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 244                               unsigned int len)
 245{
 246    unsigned int offset;
 247    int i;
 248
 249    offset = 0;
 250    for (i = 0; i < elem->in_num; i++) {
 251        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 252
 253        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
 254                                  elem->in_sg[i].iov_len,
 255                                  1, size);
 256
 257        offset += size;
 258    }
 259
 260    for (i = 0; i < elem->out_num; i++)
 261        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
 262                                  elem->out_sg[i].iov_len,
 263                                  0, elem->out_sg[i].iov_len);
 264}
 265
 266void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
 267                       unsigned int len)
 268{
 269    vq->last_avail_idx--;
 270    virtqueue_unmap_sg(vq, elem, len);
 271}
 272
 273void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 274                    unsigned int len, unsigned int idx)
 275{
 276    VRingUsedElem uelem;
 277
 278    trace_virtqueue_fill(vq, elem, len, idx);
 279
 280    virtqueue_unmap_sg(vq, elem, len);
 281
 282    idx = (idx + vq->used_idx) % vq->vring.num;
 283
 284    uelem.id = elem->index;
 285    uelem.len = len;
 286    vring_used_write(vq, &uelem, idx);
 287}
 288
 289void virtqueue_flush(VirtQueue *vq, unsigned int count)
 290{
 291    uint16_t old, new;
 292    /* Make sure buffer is written before we update index. */
 293    smp_wmb();
 294    trace_virtqueue_flush(vq, count);
 295    old = vq->used_idx;
 296    new = old + count;
 297    vring_used_idx_set(vq, new);
 298    vq->inuse -= count;
 299    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 300        vq->signalled_used_valid = false;
 301}
 302
 303void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 304                    unsigned int len)
 305{
 306    virtqueue_fill(vq, elem, len, 0);
 307    virtqueue_flush(vq, 1);
 308}
 309
 310static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 311{
 312    uint16_t num_heads = vring_avail_idx(vq) - idx;
 313
 314    /* Check it isn't doing very strange things with descriptor numbers. */
 315    if (num_heads > vq->vring.num) {
 316        error_report("Guest moved used index from %u to %u",
 317                     idx, vq->shadow_avail_idx);
 318        exit(1);
 319    }
 320    /* On success, callers read a descriptor at vq->last_avail_idx.
 321     * Make sure descriptor read does not bypass avail index read. */
 322    if (num_heads) {
 323        smp_rmb();
 324    }
 325
 326    return num_heads;
 327}
 328
 329static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
 330{
 331    unsigned int head;
 332
 333    /* Grab the next descriptor number they're advertising, and increment
 334     * the index we've seen. */
 335    head = vring_avail_ring(vq, idx % vq->vring.num);
 336
 337    /* If their number is silly, that's a fatal mistake. */
 338    if (head >= vq->vring.num) {
 339        error_report("Guest says index %u is available", head);
 340        exit(1);
 341    }
 342
 343    return head;
 344}
 345
 346static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 347                                         hwaddr desc_pa, unsigned int max)
 348{
 349    unsigned int next;
 350
 351    /* If this descriptor says it doesn't chain, we're done. */
 352    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 353        return max;
 354    }
 355
 356    /* Check they're not leading us off end of descriptors. */
 357    next = desc->next;
 358    /* Make sure compiler knows to grab that: we don't want it changing! */
 359    smp_wmb();
 360
 361    if (next >= max) {
 362        error_report("Desc next is %u", next);
 363        exit(1);
 364    }
 365
 366    vring_desc_read(vdev, desc, desc_pa, next);
 367    return next;
 368}
 369
 370void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 371                               unsigned int *out_bytes,
 372                               unsigned max_in_bytes, unsigned max_out_bytes)
 373{
 374    unsigned int idx;
 375    unsigned int total_bufs, in_total, out_total;
 376
 377    idx = vq->last_avail_idx;
 378
 379    total_bufs = in_total = out_total = 0;
 380    while (virtqueue_num_heads(vq, idx)) {
 381        VirtIODevice *vdev = vq->vdev;
 382        unsigned int max, num_bufs, indirect = 0;
 383        VRingDesc desc;
 384        hwaddr desc_pa;
 385        int i;
 386
 387        max = vq->vring.num;
 388        num_bufs = total_bufs;
 389        i = virtqueue_get_head(vq, idx++);
 390        desc_pa = vq->vring.desc;
 391        vring_desc_read(vdev, &desc, desc_pa, i);
 392
 393        if (desc.flags & VRING_DESC_F_INDIRECT) {
 394            if (desc.len % sizeof(VRingDesc)) {
 395                error_report("Invalid size for indirect buffer table");
 396                exit(1);
 397            }
 398
 399            /* If we've got too many, that implies a descriptor loop. */
 400            if (num_bufs >= max) {
 401                error_report("Looped descriptor");
 402                exit(1);
 403            }
 404
 405            /* loop over the indirect descriptor table */
 406            indirect = 1;
 407            max = desc.len / sizeof(VRingDesc);
 408            desc_pa = desc.addr;
 409            num_bufs = i = 0;
 410            vring_desc_read(vdev, &desc, desc_pa, i);
 411        }
 412
 413        do {
 414            /* If we've got too many, that implies a descriptor loop. */
 415            if (++num_bufs > max) {
 416                error_report("Looped descriptor");
 417                exit(1);
 418            }
 419
 420            if (desc.flags & VRING_DESC_F_WRITE) {
 421                in_total += desc.len;
 422            } else {
 423                out_total += desc.len;
 424            }
 425            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
 426                goto done;
 427            }
 428        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
 429
 430        if (!indirect)
 431            total_bufs = num_bufs;
 432        else
 433            total_bufs++;
 434    }
 435done:
 436    if (in_bytes) {
 437        *in_bytes = in_total;
 438    }
 439    if (out_bytes) {
 440        *out_bytes = out_total;
 441    }
 442}
 443
 444int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 445                          unsigned int out_bytes)
 446{
 447    unsigned int in_total, out_total;
 448
 449    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
 450    return in_bytes <= in_total && out_bytes <= out_total;
 451}
 452
 453static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
 454                               unsigned int max_num_sg, bool is_write,
 455                               hwaddr pa, size_t sz)
 456{
 457    unsigned num_sg = *p_num_sg;
 458    assert(num_sg <= max_num_sg);
 459
 460    while (sz) {
 461        hwaddr len = sz;
 462
 463        if (num_sg == max_num_sg) {
 464            error_report("virtio: too many write descriptors in indirect table");
 465            exit(1);
 466        }
 467
 468        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
 469        iov[num_sg].iov_len = len;
 470        addr[num_sg] = pa;
 471
 472        sz -= len;
 473        pa += len;
 474        num_sg++;
 475    }
 476    *p_num_sg = num_sg;
 477}
 478
 479static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
 480                                unsigned int *num_sg, unsigned int max_size,
 481                                int is_write)
 482{
 483    unsigned int i;
 484    hwaddr len;
 485
 486    /* Note: this function MUST validate input, some callers
 487     * are passing in num_sg values received over the network.
 488     */
 489    /* TODO: teach all callers that this can fail, and return failure instead
 490     * of asserting here.
 491     * When we do, we might be able to re-enable NDEBUG below.
 492     */
 493#ifdef NDEBUG
 494#error building with NDEBUG is not supported
 495#endif
 496    assert(*num_sg <= max_size);
 497
 498    for (i = 0; i < *num_sg; i++) {
 499        len = sg[i].iov_len;
 500        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
 501        if (!sg[i].iov_base) {
 502            error_report("virtio: error trying to map MMIO memory");
 503            exit(1);
 504        }
 505        if (len != sg[i].iov_len) {
 506            error_report("virtio: unexpected memory split");
 507            exit(1);
 508        }
 509    }
 510}
 511
 512void virtqueue_map(VirtQueueElement *elem)
 513{
 514    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
 515                        VIRTQUEUE_MAX_SIZE, 1);
 516    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
 517                        VIRTQUEUE_MAX_SIZE, 0);
 518}
 519
 520void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 521{
 522    VirtQueueElement *elem;
 523    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
 524    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
 525    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
 526    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
 527    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
 528    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
 529
 530    assert(sz >= sizeof(VirtQueueElement));
 531    elem = g_malloc(out_sg_end);
 532    elem->out_num = out_num;
 533    elem->in_num = in_num;
 534    elem->in_addr = (void *)elem + in_addr_ofs;
 535    elem->out_addr = (void *)elem + out_addr_ofs;
 536    elem->in_sg = (void *)elem + in_sg_ofs;
 537    elem->out_sg = (void *)elem + out_sg_ofs;
 538    return elem;
 539}
 540
 541void *virtqueue_pop(VirtQueue *vq, size_t sz)
 542{
 543    unsigned int i, head, max;
 544    hwaddr desc_pa = vq->vring.desc;
 545    VirtIODevice *vdev = vq->vdev;
 546    VirtQueueElement *elem;
 547    unsigned out_num, in_num;
 548    hwaddr addr[VIRTQUEUE_MAX_SIZE];
 549    struct iovec iov[VIRTQUEUE_MAX_SIZE];
 550    VRingDesc desc;
 551
 552    if (virtio_queue_empty(vq)) {
 553        return NULL;
 554    }
 555    /* Needed after virtio_queue_empty(), see comment in
 556     * virtqueue_num_heads(). */
 557    smp_rmb();
 558
 559    /* When we start there are none of either input nor output. */
 560    out_num = in_num = 0;
 561
 562    max = vq->vring.num;
 563
 564    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
 565    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
 566        vring_set_avail_event(vq, vq->last_avail_idx);
 567    }
 568
 569    vring_desc_read(vdev, &desc, desc_pa, i);
 570    if (desc.flags & VRING_DESC_F_INDIRECT) {
 571        if (desc.len % sizeof(VRingDesc)) {
 572            error_report("Invalid size for indirect buffer table");
 573            exit(1);
 574        }
 575
 576        /* loop over the indirect descriptor table */
 577        max = desc.len / sizeof(VRingDesc);
 578        desc_pa = desc.addr;
 579        i = 0;
 580        vring_desc_read(vdev, &desc, desc_pa, i);
 581    }
 582
 583    /* Collect all the descriptors */
 584    do {
 585        if (desc.flags & VRING_DESC_F_WRITE) {
 586            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
 587                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
 588        } else {
 589            if (in_num) {
 590                error_report("Incorrect order for descriptors");
 591                exit(1);
 592            }
 593            virtqueue_map_desc(&out_num, addr, iov,
 594                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
 595        }
 596
 597        /* If we've got too many, that implies a descriptor loop. */
 598        if ((in_num + out_num) > max) {
 599            error_report("Looped descriptor");
 600            exit(1);
 601        }
 602    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
 603
 604    /* Now copy what we have collected and mapped */
 605    elem = virtqueue_alloc_element(sz, out_num, in_num);
 606    elem->index = head;
 607    for (i = 0; i < out_num; i++) {
 608        elem->out_addr[i] = addr[i];
 609        elem->out_sg[i] = iov[i];
 610    }
 611    for (i = 0; i < in_num; i++) {
 612        elem->in_addr[i] = addr[out_num + i];
 613        elem->in_sg[i] = iov[out_num + i];
 614    }
 615
 616    vq->inuse++;
 617
 618    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 619    return elem;
 620}
 621
 622/* Reading and writing a structure directly to QEMUFile is *awful*, but
 623 * it is what QEMU has always done by mistake.  We can change it sooner
 624 * or later by bumping the version number of the affected vm states.
 625 * In the meanwhile, since the in-memory layout of VirtQueueElement
 626 * has changed, we need to marshal to and from the layout that was
 627 * used before the change.
 628 */
 629typedef struct VirtQueueElementOld {
 630    unsigned int index;
 631    unsigned int out_num;
 632    unsigned int in_num;
 633    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
 634    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
 635    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
 636    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 637} VirtQueueElementOld;
 638
 639void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
 640{
 641    VirtQueueElement *elem;
 642    VirtQueueElementOld data;
 643    int i;
 644
 645    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 646
 647    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
 648    elem->index = data.index;
 649
 650    for (i = 0; i < elem->in_num; i++) {
 651        elem->in_addr[i] = data.in_addr[i];
 652    }
 653
 654    for (i = 0; i < elem->out_num; i++) {
 655        elem->out_addr[i] = data.out_addr[i];
 656    }
 657
 658    for (i = 0; i < elem->in_num; i++) {
 659        /* Base is overwritten by virtqueue_map.  */
 660        elem->in_sg[i].iov_base = 0;
 661        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
 662    }
 663
 664    for (i = 0; i < elem->out_num; i++) {
 665        /* Base is overwritten by virtqueue_map.  */
 666        elem->out_sg[i].iov_base = 0;
 667        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
 668    }
 669
 670    virtqueue_map(elem);
 671    return elem;
 672}
 673
 674void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
 675{
 676    VirtQueueElementOld data;
 677    int i;
 678
 679    memset(&data, 0, sizeof(data));
 680    data.index = elem->index;
 681    data.in_num = elem->in_num;
 682    data.out_num = elem->out_num;
 683
 684    for (i = 0; i < elem->in_num; i++) {
 685        data.in_addr[i] = elem->in_addr[i];
 686    }
 687
 688    for (i = 0; i < elem->out_num; i++) {
 689        data.out_addr[i] = elem->out_addr[i];
 690    }
 691
 692    for (i = 0; i < elem->in_num; i++) {
 693        /* Base is overwritten by virtqueue_map when loading.  Do not
 694         * save it, as it would leak the QEMU address space layout.  */
 695        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
 696    }
 697
 698    for (i = 0; i < elem->out_num; i++) {
 699        /* Do not save iov_base as above.  */
 700        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
 701    }
 702    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 703}
 704
 705/* virtio device */
 706static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 707{
 708    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 709    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 710
 711    if (k->notify) {
 712        k->notify(qbus->parent, vector);
 713    }
 714}
 715
 716void virtio_update_irq(VirtIODevice *vdev)
 717{
 718    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 719}
 720
 721static int virtio_validate_features(VirtIODevice *vdev)
 722{
 723    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 724
 725    if (k->validate_features) {
 726        return k->validate_features(vdev);
 727    } else {
 728        return 0;
 729    }
 730}
 731
 732int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 733{
 734    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 735    trace_virtio_set_status(vdev, val);
 736
 737    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 738        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
 739            val & VIRTIO_CONFIG_S_FEATURES_OK) {
 740            int ret = virtio_validate_features(vdev);
 741
 742            if (ret) {
 743                return ret;
 744            }
 745        }
 746    }
 747    if (k->set_status) {
 748        k->set_status(vdev, val);
 749    }
 750    vdev->status = val;
 751    return 0;
 752}
 753
 754bool target_words_bigendian(void);
 755static enum virtio_device_endian virtio_default_endian(void)
 756{
 757    if (target_words_bigendian()) {
 758        return VIRTIO_DEVICE_ENDIAN_BIG;
 759    } else {
 760        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 761    }
 762}
 763
 764static enum virtio_device_endian virtio_current_cpu_endian(void)
 765{
 766    CPUClass *cc = CPU_GET_CLASS(current_cpu);
 767
 768    if (cc->virtio_is_big_endian(current_cpu)) {
 769        return VIRTIO_DEVICE_ENDIAN_BIG;
 770    } else {
 771        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 772    }
 773}
 774
 775void virtio_reset(void *opaque)
 776{
 777    VirtIODevice *vdev = opaque;
 778    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 779    int i;
 780
 781    virtio_set_status(vdev, 0);
 782    if (current_cpu) {
 783        /* Guest initiated reset */
 784        vdev->device_endian = virtio_current_cpu_endian();
 785    } else {
 786        /* System reset */
 787        vdev->device_endian = virtio_default_endian();
 788    }
 789
 790    if (k->reset) {
 791        k->reset(vdev);
 792    }
 793
 794    vdev->guest_features = 0;
 795    vdev->queue_sel = 0;
 796    vdev->status = 0;
 797    vdev->isr = 0;
 798    vdev->config_vector = VIRTIO_NO_VECTOR;
 799    virtio_notify_vector(vdev, vdev->config_vector);
 800
 801    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
 802        vdev->vq[i].vring.desc = 0;
 803        vdev->vq[i].vring.avail = 0;
 804        vdev->vq[i].vring.used = 0;
 805        vdev->vq[i].last_avail_idx = 0;
 806        vdev->vq[i].shadow_avail_idx = 0;
 807        vdev->vq[i].used_idx = 0;
 808        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 809        vdev->vq[i].signalled_used = 0;
 810        vdev->vq[i].signalled_used_valid = false;
 811        vdev->vq[i].notification = true;
 812        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
 813    }
 814}
 815
 816uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 817{
 818    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 819    uint8_t val;
 820
 821    if (addr + sizeof(val) > vdev->config_len) {
 822        return (uint32_t)-1;
 823    }
 824
 825    k->get_config(vdev, vdev->config);
 826
 827    val = ldub_p(vdev->config + addr);
 828    return val;
 829}
 830
 831uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 832{
 833    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 834    uint16_t val;
 835
 836    if (addr + sizeof(val) > vdev->config_len) {
 837        return (uint32_t)-1;
 838    }
 839
 840    k->get_config(vdev, vdev->config);
 841
 842    val = lduw_p(vdev->config + addr);
 843    return val;
 844}
 845
 846uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 847{
 848    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 849    uint32_t val;
 850
 851    if (addr + sizeof(val) > vdev->config_len) {
 852        return (uint32_t)-1;
 853    }
 854
 855    k->get_config(vdev, vdev->config);
 856
 857    val = ldl_p(vdev->config + addr);
 858    return val;
 859}
 860
 861void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 862{
 863    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 864    uint8_t val = data;
 865
 866    if (addr + sizeof(val) > vdev->config_len) {
 867        return;
 868    }
 869
 870    stb_p(vdev->config + addr, val);
 871
 872    if (k->set_config) {
 873        k->set_config(vdev, vdev->config);
 874    }
 875}
 876
 877void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 878{
 879    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 880    uint16_t val = data;
 881
 882    if (addr + sizeof(val) > vdev->config_len) {
 883        return;
 884    }
 885
 886    stw_p(vdev->config + addr, val);
 887
 888    if (k->set_config) {
 889        k->set_config(vdev, vdev->config);
 890    }
 891}
 892
 893void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 894{
 895    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 896    uint32_t val = data;
 897
 898    if (addr + sizeof(val) > vdev->config_len) {
 899        return;
 900    }
 901
 902    stl_p(vdev->config + addr, val);
 903
 904    if (k->set_config) {
 905        k->set_config(vdev, vdev->config);
 906    }
 907}
 908
 909uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
 910{
 911    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 912    uint8_t val;
 913
 914    if (addr + sizeof(val) > vdev->config_len) {
 915        return (uint32_t)-1;
 916    }
 917
 918    k->get_config(vdev, vdev->config);
 919
 920    val = ldub_p(vdev->config + addr);
 921    return val;
 922}
 923
 924uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
 925{
 926    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 927    uint16_t val;
 928
 929    if (addr + sizeof(val) > vdev->config_len) {
 930        return (uint32_t)-1;
 931    }
 932
 933    k->get_config(vdev, vdev->config);
 934
 935    val = lduw_le_p(vdev->config + addr);
 936    return val;
 937}
 938
 939uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
 940{
 941    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 942    uint32_t val;
 943
 944    if (addr + sizeof(val) > vdev->config_len) {
 945        return (uint32_t)-1;
 946    }
 947
 948    k->get_config(vdev, vdev->config);
 949
 950    val = ldl_le_p(vdev->config + addr);
 951    return val;
 952}
 953
 954void virtio_config_modern_writeb(VirtIODevice *vdev,
 955                                 uint32_t addr, uint32_t data)
 956{
 957    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 958    uint8_t val = data;
 959
 960    if (addr + sizeof(val) > vdev->config_len) {
 961        return;
 962    }
 963
 964    stb_p(vdev->config + addr, val);
 965
 966    if (k->set_config) {
 967        k->set_config(vdev, vdev->config);
 968    }
 969}
 970
 971void virtio_config_modern_writew(VirtIODevice *vdev,
 972                                 uint32_t addr, uint32_t data)
 973{
 974    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 975    uint16_t val = data;
 976
 977    if (addr + sizeof(val) > vdev->config_len) {
 978        return;
 979    }
 980
 981    stw_le_p(vdev->config + addr, val);
 982
 983    if (k->set_config) {
 984        k->set_config(vdev, vdev->config);
 985    }
 986}
 987
 988void virtio_config_modern_writel(VirtIODevice *vdev,
 989                                 uint32_t addr, uint32_t data)
 990{
 991    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 992    uint32_t val = data;
 993
 994    if (addr + sizeof(val) > vdev->config_len) {
 995        return;
 996    }
 997
 998    stl_le_p(vdev->config + addr, val);
 999
1000    if (k->set_config) {
1001        k->set_config(vdev, vdev->config);
1002    }
1003}
1004
1005void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1006{
1007    vdev->vq[n].vring.desc = addr;
1008    virtio_queue_update_rings(vdev, n);
1009}
1010
1011hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1012{
1013    return vdev->vq[n].vring.desc;
1014}
1015
1016void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1017                            hwaddr avail, hwaddr used)
1018{
1019    vdev->vq[n].vring.desc = desc;
1020    vdev->vq[n].vring.avail = avail;
1021    vdev->vq[n].vring.used = used;
1022}
1023
1024void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1025{
1026    /* Don't allow guest to flip queue between existent and
1027     * nonexistent states, or to set it to an invalid size.
1028     */
1029    if (!!num != !!vdev->vq[n].vring.num ||
1030        num > VIRTQUEUE_MAX_SIZE ||
1031        num < 0) {
1032        return;
1033    }
1034    vdev->vq[n].vring.num = num;
1035}
1036
1037VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1038{
1039    return QLIST_FIRST(&vdev->vector_queues[vector]);
1040}
1041
1042VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1043{
1044    return QLIST_NEXT(vq, node);
1045}
1046
1047int virtio_queue_get_num(VirtIODevice *vdev, int n)
1048{
1049    return vdev->vq[n].vring.num;
1050}
1051
1052int virtio_get_num_queues(VirtIODevice *vdev)
1053{
1054    int i;
1055
1056    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1057        if (!virtio_queue_get_num(vdev, i)) {
1058            break;
1059        }
1060    }
1061
1062    return i;
1063}
1064
1065int virtio_queue_get_id(VirtQueue *vq)
1066{
1067    VirtIODevice *vdev = vq->vdev;
1068    assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_QUEUE_MAX]);
1069    return vq - &vdev->vq[0];
1070}
1071
1072void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1073{
1074    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1075    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1076
1077    /* virtio-1 compliant devices cannot change the alignment */
1078    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1079        error_report("tried to modify queue alignment for virtio-1 device");
1080        return;
1081    }
1082    /* Check that the transport told us it was going to do this
1083     * (so a buggy transport will immediately assert rather than
1084     * silently failing to migrate this state)
1085     */
1086    assert(k->has_variable_vring_alignment);
1087
1088    vdev->vq[n].vring.align = align;
1089    virtio_queue_update_rings(vdev, n);
1090}
1091
1092static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1093{
1094    if (vq->vring.desc && vq->handle_aio_output) {
1095        VirtIODevice *vdev = vq->vdev;
1096
1097        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1098        vq->handle_aio_output(vdev, vq);
1099    }
1100}
1101
1102static void virtio_queue_notify_vq(VirtQueue *vq)
1103{
1104    if (vq->vring.desc && vq->handle_output) {
1105        VirtIODevice *vdev = vq->vdev;
1106
1107        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1108        vq->handle_output(vdev, vq);
1109    }
1110}
1111
1112void virtio_queue_notify(VirtIODevice *vdev, int n)
1113{
1114    virtio_queue_notify_vq(&vdev->vq[n]);
1115}
1116
1117uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1118{
1119    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1120        VIRTIO_NO_VECTOR;
1121}
1122
1123void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1124{
1125    VirtQueue *vq = &vdev->vq[n];
1126
1127    if (n < VIRTIO_QUEUE_MAX) {
1128        if (vdev->vector_queues &&
1129            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1130            QLIST_REMOVE(vq, node);
1131        }
1132        vdev->vq[n].vector = vector;
1133        if (vdev->vector_queues &&
1134            vector != VIRTIO_NO_VECTOR) {
1135            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1136        }
1137    }
1138}
1139
1140VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1141                            void (*handle_output)(VirtIODevice *, VirtQueue *))
1142{
1143    int i;
1144
1145    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1146        if (vdev->vq[i].vring.num == 0)
1147            break;
1148    }
1149
1150    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1151        abort();
1152
1153    vdev->vq[i].vring.num = queue_size;
1154    vdev->vq[i].vring.num_default = queue_size;
1155    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1156    vdev->vq[i].handle_output = handle_output;
1157    vdev->vq[i].handle_aio_output = NULL;
1158
1159    return &vdev->vq[i];
1160}
1161
1162void virtio_del_queue(VirtIODevice *vdev, int n)
1163{
1164    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1165        abort();
1166    }
1167
1168    vdev->vq[n].vring.num = 0;
1169    vdev->vq[n].vring.num_default = 0;
1170}
1171
1172void virtio_irq(VirtQueue *vq)
1173{
1174    trace_virtio_irq(vq);
1175    vq->vdev->isr |= 0x01;
1176    virtio_notify_vector(vq->vdev, vq->vector);
1177}
1178
1179bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1180{
1181    uint16_t old, new;
1182    bool v;
1183    /* We need to expose used array entries before checking used event. */
1184    smp_mb();
1185    /* Always notify when queue is empty (when feature acknowledge) */
1186    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1187        !vq->inuse && virtio_queue_empty(vq)) {
1188        return true;
1189    }
1190
1191    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1192        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1193    }
1194
1195    v = vq->signalled_used_valid;
1196    vq->signalled_used_valid = true;
1197    old = vq->signalled_used;
1198    new = vq->signalled_used = vq->used_idx;
1199    return !v || vring_need_event(vring_get_used_event(vq), new, old);
1200}
1201
1202void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1203{
1204    if (!virtio_should_notify(vdev, vq)) {
1205        return;
1206    }
1207
1208    trace_virtio_notify(vdev, vq);
1209    vdev->isr |= 0x01;
1210    virtio_notify_vector(vdev, vq->vector);
1211}
1212
1213void virtio_notify_config(VirtIODevice *vdev)
1214{
1215    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1216        return;
1217
1218    vdev->isr |= 0x03;
1219    vdev->generation++;
1220    virtio_notify_vector(vdev, vdev->config_vector);
1221}
1222
1223static bool virtio_device_endian_needed(void *opaque)
1224{
1225    VirtIODevice *vdev = opaque;
1226
1227    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1228    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1229        return vdev->device_endian != virtio_default_endian();
1230    }
1231    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1232    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1233}
1234
1235static bool virtio_64bit_features_needed(void *opaque)
1236{
1237    VirtIODevice *vdev = opaque;
1238
1239    return (vdev->host_features >> 32) != 0;
1240}
1241
1242static bool virtio_virtqueue_needed(void *opaque)
1243{
1244    VirtIODevice *vdev = opaque;
1245
1246    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1247}
1248
1249static bool virtio_ringsize_needed(void *opaque)
1250{
1251    VirtIODevice *vdev = opaque;
1252    int i;
1253
1254    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1255        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1256            return true;
1257        }
1258    }
1259    return false;
1260}
1261
1262static bool virtio_extra_state_needed(void *opaque)
1263{
1264    VirtIODevice *vdev = opaque;
1265    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1266    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1267
1268    return k->has_extra_state &&
1269        k->has_extra_state(qbus->parent);
1270}
1271
1272static const VMStateDescription vmstate_virtqueue = {
1273    .name = "virtqueue_state",
1274    .version_id = 1,
1275    .minimum_version_id = 1,
1276    .fields = (VMStateField[]) {
1277        VMSTATE_UINT64(vring.avail, struct VirtQueue),
1278        VMSTATE_UINT64(vring.used, struct VirtQueue),
1279        VMSTATE_END_OF_LIST()
1280    }
1281};
1282
1283static const VMStateDescription vmstate_virtio_virtqueues = {
1284    .name = "virtio/virtqueues",
1285    .version_id = 1,
1286    .minimum_version_id = 1,
1287    .needed = &virtio_virtqueue_needed,
1288    .fields = (VMStateField[]) {
1289        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1290                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1291        VMSTATE_END_OF_LIST()
1292    }
1293};
1294
1295static const VMStateDescription vmstate_ringsize = {
1296    .name = "ringsize_state",
1297    .version_id = 1,
1298    .minimum_version_id = 1,
1299    .fields = (VMStateField[]) {
1300        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1301        VMSTATE_END_OF_LIST()
1302    }
1303};
1304
1305static const VMStateDescription vmstate_virtio_ringsize = {
1306    .name = "virtio/ringsize",
1307    .version_id = 1,
1308    .minimum_version_id = 1,
1309    .needed = &virtio_ringsize_needed,
1310    .fields = (VMStateField[]) {
1311        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1312                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1313        VMSTATE_END_OF_LIST()
1314    }
1315};
1316
1317static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1318{
1319    VirtIODevice *vdev = pv;
1320    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1321    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1322
1323    if (!k->load_extra_state) {
1324        return -1;
1325    } else {
1326        return k->load_extra_state(qbus->parent, f);
1327    }
1328}
1329
1330static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1331{
1332    VirtIODevice *vdev = pv;
1333    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1334    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1335
1336    k->save_extra_state(qbus->parent, f);
1337}
1338
1339static const VMStateInfo vmstate_info_extra_state = {
1340    .name = "virtqueue_extra_state",
1341    .get = get_extra_state,
1342    .put = put_extra_state,
1343};
1344
1345static const VMStateDescription vmstate_virtio_extra_state = {
1346    .name = "virtio/extra_state",
1347    .version_id = 1,
1348    .minimum_version_id = 1,
1349    .needed = &virtio_extra_state_needed,
1350    .fields = (VMStateField[]) {
1351        {
1352            .name         = "extra_state",
1353            .version_id   = 0,
1354            .field_exists = NULL,
1355            .size         = 0,
1356            .info         = &vmstate_info_extra_state,
1357            .flags        = VMS_SINGLE,
1358            .offset       = 0,
1359        },
1360        VMSTATE_END_OF_LIST()
1361    }
1362};
1363
1364static const VMStateDescription vmstate_virtio_device_endian = {
1365    .name = "virtio/device_endian",
1366    .version_id = 1,
1367    .minimum_version_id = 1,
1368    .needed = &virtio_device_endian_needed,
1369    .fields = (VMStateField[]) {
1370        VMSTATE_UINT8(device_endian, VirtIODevice),
1371        VMSTATE_END_OF_LIST()
1372    }
1373};
1374
1375static const VMStateDescription vmstate_virtio_64bit_features = {
1376    .name = "virtio/64bit_features",
1377    .version_id = 1,
1378    .minimum_version_id = 1,
1379    .needed = &virtio_64bit_features_needed,
1380    .fields = (VMStateField[]) {
1381        VMSTATE_UINT64(guest_features, VirtIODevice),
1382        VMSTATE_END_OF_LIST()
1383    }
1384};
1385
1386static const VMStateDescription vmstate_virtio = {
1387    .name = "virtio",
1388    .version_id = 1,
1389    .minimum_version_id = 1,
1390    .minimum_version_id_old = 1,
1391    .fields = (VMStateField[]) {
1392        VMSTATE_END_OF_LIST()
1393    },
1394    .subsections = (const VMStateDescription*[]) {
1395        &vmstate_virtio_device_endian,
1396        &vmstate_virtio_64bit_features,
1397        &vmstate_virtio_virtqueues,
1398        &vmstate_virtio_ringsize,
1399        &vmstate_virtio_extra_state,
1400        NULL
1401    }
1402};
1403
1404void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1405{
1406    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1407    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1408    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1409    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1410    int i;
1411
1412    if (k->save_config) {
1413        k->save_config(qbus->parent, f);
1414    }
1415
1416    qemu_put_8s(f, &vdev->status);
1417    qemu_put_8s(f, &vdev->isr);
1418    qemu_put_be16s(f, &vdev->queue_sel);
1419    qemu_put_be32s(f, &guest_features_lo);
1420    qemu_put_be32(f, vdev->config_len);
1421    qemu_put_buffer(f, vdev->config, vdev->config_len);
1422
1423    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1424        if (vdev->vq[i].vring.num == 0)
1425            break;
1426    }
1427
1428    qemu_put_be32(f, i);
1429
1430    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1431        if (vdev->vq[i].vring.num == 0)
1432            break;
1433
1434        qemu_put_be32(f, vdev->vq[i].vring.num);
1435        if (k->has_variable_vring_alignment) {
1436            qemu_put_be32(f, vdev->vq[i].vring.align);
1437        }
1438        /* XXX virtio-1 devices */
1439        qemu_put_be64(f, vdev->vq[i].vring.desc);
1440        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1441        if (k->save_queue) {
1442            k->save_queue(qbus->parent, i, f);
1443        }
1444    }
1445
1446    if (vdc->save != NULL) {
1447        vdc->save(vdev, f);
1448    }
1449
1450    /* Subsections */
1451    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1452}
1453
1454static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1455{
1456    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1457    bool bad = (val & ~(vdev->host_features)) != 0;
1458
1459    val &= vdev->host_features;
1460    if (k->set_features) {
1461        k->set_features(vdev, val);
1462    }
1463    vdev->guest_features = val;
1464    return bad ? -1 : 0;
1465}
1466
1467int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1468{
1469   /*
1470     * The driver must not attempt to set features after feature negotiation
1471     * has finished.
1472     */
1473    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1474        return -EINVAL;
1475    }
1476    return virtio_set_features_nocheck(vdev, val);
1477}
1478
1479int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1480{
1481    int i, ret;
1482    int32_t config_len;
1483    uint32_t num;
1484    uint32_t features;
1485    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1486    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1487    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1488
1489    /*
1490     * We poison the endianness to ensure it does not get used before
1491     * subsections have been loaded.
1492     */
1493    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1494
1495    if (k->load_config) {
1496        ret = k->load_config(qbus->parent, f);
1497        if (ret)
1498            return ret;
1499    }
1500
1501    qemu_get_8s(f, &vdev->status);
1502    qemu_get_8s(f, &vdev->isr);
1503    qemu_get_be16s(f, &vdev->queue_sel);
1504    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1505        return -1;
1506    }
1507    qemu_get_be32s(f, &features);
1508
1509    config_len = qemu_get_be32(f);
1510
1511    /*
1512     * There are cases where the incoming config can be bigger or smaller
1513     * than what we have; so load what we have space for, and skip
1514     * any excess that's in the stream.
1515     */
1516    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1517
1518    while (config_len > vdev->config_len) {
1519        qemu_get_byte(f);
1520        config_len--;
1521    }
1522
1523    num = qemu_get_be32(f);
1524
1525    if (num > VIRTIO_QUEUE_MAX) {
1526        error_report("Invalid number of virtqueues: 0x%x", num);
1527        return -1;
1528    }
1529
1530    for (i = 0; i < num; i++) {
1531        vdev->vq[i].vring.num = qemu_get_be32(f);
1532        if (k->has_variable_vring_alignment) {
1533            vdev->vq[i].vring.align = qemu_get_be32(f);
1534        }
1535        vdev->vq[i].vring.desc = qemu_get_be64(f);
1536        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1537        vdev->vq[i].signalled_used_valid = false;
1538        vdev->vq[i].notification = true;
1539
1540        if (vdev->vq[i].vring.desc) {
1541            /* XXX virtio-1 devices */
1542            virtio_queue_update_rings(vdev, i);
1543        } else if (vdev->vq[i].last_avail_idx) {
1544            error_report("VQ %d address 0x0 "
1545                         "inconsistent with Host index 0x%x",
1546                         i, vdev->vq[i].last_avail_idx);
1547                return -1;
1548        }
1549        if (k->load_queue) {
1550            ret = k->load_queue(qbus->parent, i, f);
1551            if (ret)
1552                return ret;
1553        }
1554    }
1555
1556    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1557
1558    if (vdc->load != NULL) {
1559        ret = vdc->load(vdev, f, version_id);
1560        if (ret) {
1561            return ret;
1562        }
1563    }
1564
1565    /* Subsections */
1566    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1567    if (ret) {
1568        return ret;
1569    }
1570
1571    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1572        vdev->device_endian = virtio_default_endian();
1573    }
1574
1575    if (virtio_64bit_features_needed(vdev)) {
1576        /*
1577         * Subsection load filled vdev->guest_features.  Run them
1578         * through virtio_set_features to sanity-check them against
1579         * host_features.
1580         */
1581        uint64_t features64 = vdev->guest_features;
1582        if (virtio_set_features_nocheck(vdev, features64) < 0) {
1583            error_report("Features 0x%" PRIx64 " unsupported. "
1584                         "Allowed features: 0x%" PRIx64,
1585                         features64, vdev->host_features);
1586            return -1;
1587        }
1588    } else {
1589        if (virtio_set_features_nocheck(vdev, features) < 0) {
1590            error_report("Features 0x%x unsupported. "
1591                         "Allowed features: 0x%" PRIx64,
1592                         features, vdev->host_features);
1593            return -1;
1594        }
1595    }
1596
1597    for (i = 0; i < num; i++) {
1598        if (vdev->vq[i].vring.desc) {
1599            uint16_t nheads;
1600            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1601            /* Check it isn't doing strange things with descriptor numbers. */
1602            if (nheads > vdev->vq[i].vring.num) {
1603                error_report("VQ %d size 0x%x Guest index 0x%x "
1604                             "inconsistent with Host index 0x%x: delta 0x%x",
1605                             i, vdev->vq[i].vring.num,
1606                             vring_avail_idx(&vdev->vq[i]),
1607                             vdev->vq[i].last_avail_idx, nheads);
1608                return -1;
1609            }
1610            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1611            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1612        }
1613    }
1614
1615    return 0;
1616}
1617
1618void virtio_cleanup(VirtIODevice *vdev)
1619{
1620    qemu_del_vm_change_state_handler(vdev->vmstate);
1621    g_free(vdev->config);
1622    g_free(vdev->vq);
1623    g_free(vdev->vector_queues);
1624}
1625
1626static void virtio_vmstate_change(void *opaque, int running, RunState state)
1627{
1628    VirtIODevice *vdev = opaque;
1629    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1630    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1631    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1632    vdev->vm_running = running;
1633
1634    if (backend_run) {
1635        virtio_set_status(vdev, vdev->status);
1636    }
1637
1638    if (k->vmstate_change) {
1639        k->vmstate_change(qbus->parent, backend_run);
1640    }
1641
1642    if (!backend_run) {
1643        virtio_set_status(vdev, vdev->status);
1644    }
1645}
1646
1647void virtio_instance_init_common(Object *proxy_obj, void *data,
1648                                 size_t vdev_size, const char *vdev_name)
1649{
1650    DeviceState *vdev = data;
1651
1652    object_initialize(vdev, vdev_size, vdev_name);
1653    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1654    object_unref(OBJECT(vdev));
1655    qdev_alias_all_properties(vdev, proxy_obj);
1656}
1657
1658void virtio_init(VirtIODevice *vdev, const char *name,
1659                 uint16_t device_id, size_t config_size)
1660{
1661    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1662    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1663    int i;
1664    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1665
1666    if (nvectors) {
1667        vdev->vector_queues =
1668            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1669    }
1670
1671    vdev->device_id = device_id;
1672    vdev->status = 0;
1673    vdev->isr = 0;
1674    vdev->queue_sel = 0;
1675    vdev->config_vector = VIRTIO_NO_VECTOR;
1676    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1677    vdev->vm_running = runstate_is_running();
1678    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1679        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1680        vdev->vq[i].vdev = vdev;
1681        vdev->vq[i].queue_index = i;
1682    }
1683
1684    vdev->name = name;
1685    vdev->config_len = config_size;
1686    if (vdev->config_len) {
1687        vdev->config = g_malloc0(config_size);
1688    } else {
1689        vdev->config = NULL;
1690    }
1691    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1692                                                     vdev);
1693    vdev->device_endian = virtio_default_endian();
1694    vdev->use_guest_notifier_mask = true;
1695}
1696
1697hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1698{
1699    return vdev->vq[n].vring.desc;
1700}
1701
1702hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1703{
1704    return vdev->vq[n].vring.avail;
1705}
1706
1707hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1708{
1709    return vdev->vq[n].vring.used;
1710}
1711
1712hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1713{
1714    return vdev->vq[n].vring.desc;
1715}
1716
1717hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1718{
1719    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1720}
1721
1722hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1723{
1724    return offsetof(VRingAvail, ring) +
1725        sizeof(uint16_t) * vdev->vq[n].vring.num;
1726}
1727
1728hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1729{
1730    return offsetof(VRingUsed, ring) +
1731        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1732}
1733
1734hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1735{
1736    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1737            virtio_queue_get_used_size(vdev, n);
1738}
1739
1740uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1741{
1742    return vdev->vq[n].last_avail_idx;
1743}
1744
1745void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1746{
1747    vdev->vq[n].last_avail_idx = idx;
1748    vdev->vq[n].shadow_avail_idx = idx;
1749}
1750
1751void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1752{
1753    vdev->vq[n].signalled_used_valid = false;
1754}
1755
1756VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1757{
1758    return vdev->vq + n;
1759}
1760
1761uint16_t virtio_get_queue_index(VirtQueue *vq)
1762{
1763    return vq->queue_index;
1764}
1765
1766static void virtio_queue_guest_notifier_read(EventNotifier *n)
1767{
1768    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1769    if (event_notifier_test_and_clear(n)) {
1770        virtio_irq(vq);
1771    }
1772}
1773
1774void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1775                                                bool with_irqfd)
1776{
1777    if (assign && !with_irqfd) {
1778        event_notifier_set_handler(&vq->guest_notifier, false,
1779                                   virtio_queue_guest_notifier_read);
1780    } else {
1781        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
1782    }
1783    if (!assign) {
1784        /* Test and clear notifier before closing it,
1785         * in case poll callback didn't have time to run. */
1786        virtio_queue_guest_notifier_read(&vq->guest_notifier);
1787    }
1788}
1789
1790EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1791{
1792    return &vq->guest_notifier;
1793}
1794
1795static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
1796{
1797    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1798    if (event_notifier_test_and_clear(n)) {
1799        virtio_queue_notify_aio_vq(vq);
1800    }
1801}
1802
1803void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
1804                                                void (*handle_output)(VirtIODevice *,
1805                                                                      VirtQueue *))
1806{
1807    if (handle_output) {
1808        vq->handle_aio_output = handle_output;
1809        aio_set_event_notifier(ctx, &vq->host_notifier, true,
1810                               virtio_queue_host_notifier_aio_read);
1811    } else {
1812        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
1813        /* Test and clear notifier before after disabling event,
1814         * in case poll callback didn't have time to run. */
1815        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
1816        vq->handle_aio_output = NULL;
1817    }
1818}
1819
1820static void virtio_queue_host_notifier_read(EventNotifier *n)
1821{
1822    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1823    if (event_notifier_test_and_clear(n)) {
1824        virtio_queue_notify_vq(vq);
1825    }
1826}
1827
1828void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1829                                               bool set_handler)
1830{
1831    if (assign && set_handler) {
1832        event_notifier_set_handler(&vq->host_notifier, true,
1833                                   virtio_queue_host_notifier_read);
1834    } else {
1835        event_notifier_set_handler(&vq->host_notifier, true, NULL);
1836    }
1837    if (!assign) {
1838        /* Test and clear notifier before after disabling event,
1839         * in case poll callback didn't have time to run. */
1840        virtio_queue_host_notifier_read(&vq->host_notifier);
1841    }
1842}
1843
1844EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1845{
1846    return &vq->host_notifier;
1847}
1848
1849void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1850{
1851    g_free(vdev->bus_name);
1852    vdev->bus_name = g_strdup(bus_name);
1853}
1854
1855static void virtio_device_realize(DeviceState *dev, Error **errp)
1856{
1857    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1858    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1859    Error *err = NULL;
1860
1861    if (vdc->realize != NULL) {
1862        vdc->realize(dev, &err);
1863        if (err != NULL) {
1864            error_propagate(errp, err);
1865            return;
1866        }
1867    }
1868
1869    virtio_bus_device_plugged(vdev, &err);
1870    if (err != NULL) {
1871        error_propagate(errp, err);
1872        return;
1873    }
1874}
1875
1876static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1877{
1878    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1879    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1880    Error *err = NULL;
1881
1882    virtio_bus_device_unplugged(vdev);
1883
1884    if (vdc->unrealize != NULL) {
1885        vdc->unrealize(dev, &err);
1886        if (err != NULL) {
1887            error_propagate(errp, err);
1888            return;
1889        }
1890    }
1891
1892    g_free(vdev->bus_name);
1893    vdev->bus_name = NULL;
1894}
1895
1896static Property virtio_properties[] = {
1897    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
1898    DEFINE_PROP_END_OF_LIST(),
1899};
1900
1901static void virtio_device_class_init(ObjectClass *klass, void *data)
1902{
1903    /* Set the default value here. */
1904    DeviceClass *dc = DEVICE_CLASS(klass);
1905
1906    dc->realize = virtio_device_realize;
1907    dc->unrealize = virtio_device_unrealize;
1908    dc->bus_type = TYPE_VIRTIO_BUS;
1909    dc->props = virtio_properties;
1910}
1911
1912static const TypeInfo virtio_device_info = {
1913    .name = TYPE_VIRTIO_DEVICE,
1914    .parent = TYPE_DEVICE,
1915    .instance_size = sizeof(VirtIODevice),
1916    .class_init = virtio_device_class_init,
1917    .abstract = true,
1918    .class_size = sizeof(VirtioDeviceClass),
1919};
1920
1921static void virtio_register_types(void)
1922{
1923    type_register_static(&virtio_device_info);
1924}
1925
1926type_init(virtio_register_types)
1927