qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qemu-common.h"
  17#include "cpu.h"
  18#include "trace.h"
  19#include "exec/address-spaces.h"
  20#include "qemu/error-report.h"
  21#include "hw/virtio/virtio.h"
  22#include "qemu/atomic.h"
  23#include "hw/virtio/virtio-bus.h"
  24#include "migration/migration.h"
  25#include "hw/virtio/virtio-access.h"
  26
  27/*
  28 * The alignment to use between consumer and producer parts of vring.
  29 * x86 pagesize again. This is the default, used by transports like PCI
  30 * which don't provide a means for the guest to tell the host the alignment.
  31 */
  32#define VIRTIO_PCI_VRING_ALIGN         4096
  33
  34typedef struct VRingDesc
  35{
  36    uint64_t addr;
  37    uint32_t len;
  38    uint16_t flags;
  39    uint16_t next;
  40} VRingDesc;
  41
  42typedef struct VRingAvail
  43{
  44    uint16_t flags;
  45    uint16_t idx;
  46    uint16_t ring[0];
  47} VRingAvail;
  48
  49typedef struct VRingUsedElem
  50{
  51    uint32_t id;
  52    uint32_t len;
  53} VRingUsedElem;
  54
  55typedef struct VRingUsed
  56{
  57    uint16_t flags;
  58    uint16_t idx;
  59    VRingUsedElem ring[0];
  60} VRingUsed;
  61
  62typedef struct VRing
  63{
  64    unsigned int num;
  65    unsigned int num_default;
  66    unsigned int align;
  67    hwaddr desc;
  68    hwaddr avail;
  69    hwaddr used;
  70} VRing;
  71
  72struct VirtQueue
  73{
  74    VRing vring;
  75
  76    /* Next head to pop */
  77    uint16_t last_avail_idx;
  78
  79    /* Last avail_idx read from VQ. */
  80    uint16_t shadow_avail_idx;
  81
  82    uint16_t used_idx;
  83
  84    /* Last used index value we have signalled on */
  85    uint16_t signalled_used;
  86
  87    /* Last used index value we have signalled on */
  88    bool signalled_used_valid;
  89
  90    /* Notification enabled? */
  91    bool notification;
  92
  93    uint16_t queue_index;
  94
  95    int inuse;
  96
  97    uint16_t vector;
  98    VirtIOHandleOutput handle_output;
  99    VirtIOHandleOutput handle_aio_output;
 100    bool use_aio;
 101    VirtIODevice *vdev;
 102    EventNotifier guest_notifier;
 103    EventNotifier host_notifier;
 104    QLIST_ENTRY(VirtQueue) node;
 105};
 106
 107/* virt queue functions */
 108void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 109{
 110    VRing *vring = &vdev->vq[n].vring;
 111
 112    if (!vring->desc) {
 113        /* not yet setup -> nothing to do */
 114        return;
 115    }
 116    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 117    vring->used = vring_align(vring->avail +
 118                              offsetof(VRingAvail, ring[vring->num]),
 119                              vring->align);
 120}
 121
 122static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 123                            hwaddr desc_pa, int i)
 124{
 125    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
 126                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
 127    virtio_tswap64s(vdev, &desc->addr);
 128    virtio_tswap32s(vdev, &desc->len);
 129    virtio_tswap16s(vdev, &desc->flags);
 130    virtio_tswap16s(vdev, &desc->next);
 131}
 132
 133static inline uint16_t vring_avail_flags(VirtQueue *vq)
 134{
 135    hwaddr pa;
 136    pa = vq->vring.avail + offsetof(VRingAvail, flags);
 137    return virtio_lduw_phys(vq->vdev, pa);
 138}
 139
 140static inline uint16_t vring_avail_idx(VirtQueue *vq)
 141{
 142    hwaddr pa;
 143    pa = vq->vring.avail + offsetof(VRingAvail, idx);
 144    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
 145    return vq->shadow_avail_idx;
 146}
 147
 148static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 149{
 150    hwaddr pa;
 151    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
 152    return virtio_lduw_phys(vq->vdev, pa);
 153}
 154
 155static inline uint16_t vring_get_used_event(VirtQueue *vq)
 156{
 157    return vring_avail_ring(vq, vq->vring.num);
 158}
 159
 160static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 161                                    int i)
 162{
 163    hwaddr pa;
 164    virtio_tswap32s(vq->vdev, &uelem->id);
 165    virtio_tswap32s(vq->vdev, &uelem->len);
 166    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
 167    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
 168                       (void *)uelem, sizeof(VRingUsedElem));
 169}
 170
 171static uint16_t vring_used_idx(VirtQueue *vq)
 172{
 173    hwaddr pa;
 174    pa = vq->vring.used + offsetof(VRingUsed, idx);
 175    return virtio_lduw_phys(vq->vdev, pa);
 176}
 177
 178static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 179{
 180    hwaddr pa;
 181    pa = vq->vring.used + offsetof(VRingUsed, idx);
 182    virtio_stw_phys(vq->vdev, pa, val);
 183    vq->used_idx = val;
 184}
 185
 186static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 187{
 188    VirtIODevice *vdev = vq->vdev;
 189    hwaddr pa;
 190    pa = vq->vring.used + offsetof(VRingUsed, flags);
 191    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
 192}
 193
 194static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 195{
 196    VirtIODevice *vdev = vq->vdev;
 197    hwaddr pa;
 198    pa = vq->vring.used + offsetof(VRingUsed, flags);
 199    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
 200}
 201
 202static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 203{
 204    hwaddr pa;
 205    if (!vq->notification) {
 206        return;
 207    }
 208    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
 209    virtio_stw_phys(vq->vdev, pa, val);
 210}
 211
 212void virtio_queue_set_notification(VirtQueue *vq, int enable)
 213{
 214    vq->notification = enable;
 215    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 216        vring_set_avail_event(vq, vring_avail_idx(vq));
 217    } else if (enable) {
 218        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 219    } else {
 220        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 221    }
 222    if (enable) {
 223        /* Expose avail event/used flags before caller checks the avail idx. */
 224        smp_mb();
 225    }
 226}
 227
 228int virtio_queue_ready(VirtQueue *vq)
 229{
 230    return vq->vring.avail != 0;
 231}
 232
 233/* Fetch avail_idx from VQ memory only when we really need to know if
 234 * guest has added some buffers. */
 235int virtio_queue_empty(VirtQueue *vq)
 236{
 237    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 238        return 0;
 239    }
 240
 241    return vring_avail_idx(vq) == vq->last_avail_idx;
 242}
 243
 244static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 245                               unsigned int len)
 246{
 247    unsigned int offset;
 248    int i;
 249
 250    offset = 0;
 251    for (i = 0; i < elem->in_num; i++) {
 252        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 253
 254        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
 255                                  elem->in_sg[i].iov_len,
 256                                  1, size);
 257
 258        offset += size;
 259    }
 260
 261    for (i = 0; i < elem->out_num; i++)
 262        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
 263                                  elem->out_sg[i].iov_len,
 264                                  0, elem->out_sg[i].iov_len);
 265}
 266
 267void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
 268                       unsigned int len)
 269{
 270    vq->last_avail_idx--;
 271    vq->inuse--;
 272    virtqueue_unmap_sg(vq, elem, len);
 273}
 274
 275void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 276                    unsigned int len, unsigned int idx)
 277{
 278    VRingUsedElem uelem;
 279
 280    trace_virtqueue_fill(vq, elem, len, idx);
 281
 282    virtqueue_unmap_sg(vq, elem, len);
 283
 284    idx = (idx + vq->used_idx) % vq->vring.num;
 285
 286    uelem.id = elem->index;
 287    uelem.len = len;
 288    vring_used_write(vq, &uelem, idx);
 289}
 290
 291void virtqueue_flush(VirtQueue *vq, unsigned int count)
 292{
 293    uint16_t old, new;
 294    /* Make sure buffer is written before we update index. */
 295    smp_wmb();
 296    trace_virtqueue_flush(vq, count);
 297    old = vq->used_idx;
 298    new = old + count;
 299    vring_used_idx_set(vq, new);
 300    vq->inuse -= count;
 301    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 302        vq->signalled_used_valid = false;
 303}
 304
 305void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 306                    unsigned int len)
 307{
 308    virtqueue_fill(vq, elem, len, 0);
 309    virtqueue_flush(vq, 1);
 310}
 311
 312static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 313{
 314    uint16_t num_heads = vring_avail_idx(vq) - idx;
 315
 316    /* Check it isn't doing very strange things with descriptor numbers. */
 317    if (num_heads > vq->vring.num) {
 318        error_report("Guest moved used index from %u to %u",
 319                     idx, vq->shadow_avail_idx);
 320        exit(1);
 321    }
 322    /* On success, callers read a descriptor at vq->last_avail_idx.
 323     * Make sure descriptor read does not bypass avail index read. */
 324    if (num_heads) {
 325        smp_rmb();
 326    }
 327
 328    return num_heads;
 329}
 330
 331static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
 332{
 333    unsigned int head;
 334
 335    /* Grab the next descriptor number they're advertising, and increment
 336     * the index we've seen. */
 337    head = vring_avail_ring(vq, idx % vq->vring.num);
 338
 339    /* If their number is silly, that's a fatal mistake. */
 340    if (head >= vq->vring.num) {
 341        error_report("Guest says index %u is available", head);
 342        exit(1);
 343    }
 344
 345    return head;
 346}
 347
 348static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 349                                         hwaddr desc_pa, unsigned int max)
 350{
 351    unsigned int next;
 352
 353    /* If this descriptor says it doesn't chain, we're done. */
 354    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 355        return max;
 356    }
 357
 358    /* Check they're not leading us off end of descriptors. */
 359    next = desc->next;
 360    /* Make sure compiler knows to grab that: we don't want it changing! */
 361    smp_wmb();
 362
 363    if (next >= max) {
 364        error_report("Desc next is %u", next);
 365        exit(1);
 366    }
 367
 368    vring_desc_read(vdev, desc, desc_pa, next);
 369    return next;
 370}
 371
 372void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 373                               unsigned int *out_bytes,
 374                               unsigned max_in_bytes, unsigned max_out_bytes)
 375{
 376    unsigned int idx;
 377    unsigned int total_bufs, in_total, out_total;
 378
 379    idx = vq->last_avail_idx;
 380
 381    total_bufs = in_total = out_total = 0;
 382    while (virtqueue_num_heads(vq, idx)) {
 383        VirtIODevice *vdev = vq->vdev;
 384        unsigned int max, num_bufs, indirect = 0;
 385        VRingDesc desc;
 386        hwaddr desc_pa;
 387        int i;
 388
 389        max = vq->vring.num;
 390        num_bufs = total_bufs;
 391        i = virtqueue_get_head(vq, idx++);
 392        desc_pa = vq->vring.desc;
 393        vring_desc_read(vdev, &desc, desc_pa, i);
 394
 395        if (desc.flags & VRING_DESC_F_INDIRECT) {
 396            if (desc.len % sizeof(VRingDesc)) {
 397                error_report("Invalid size for indirect buffer table");
 398                exit(1);
 399            }
 400
 401            /* If we've got too many, that implies a descriptor loop. */
 402            if (num_bufs >= max) {
 403                error_report("Looped descriptor");
 404                exit(1);
 405            }
 406
 407            /* loop over the indirect descriptor table */
 408            indirect = 1;
 409            max = desc.len / sizeof(VRingDesc);
 410            desc_pa = desc.addr;
 411            num_bufs = i = 0;
 412            vring_desc_read(vdev, &desc, desc_pa, i);
 413        }
 414
 415        do {
 416            /* If we've got too many, that implies a descriptor loop. */
 417            if (++num_bufs > max) {
 418                error_report("Looped descriptor");
 419                exit(1);
 420            }
 421
 422            if (desc.flags & VRING_DESC_F_WRITE) {
 423                in_total += desc.len;
 424            } else {
 425                out_total += desc.len;
 426            }
 427            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
 428                goto done;
 429            }
 430        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
 431
 432        if (!indirect)
 433            total_bufs = num_bufs;
 434        else
 435            total_bufs++;
 436    }
 437done:
 438    if (in_bytes) {
 439        *in_bytes = in_total;
 440    }
 441    if (out_bytes) {
 442        *out_bytes = out_total;
 443    }
 444}
 445
 446int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 447                          unsigned int out_bytes)
 448{
 449    unsigned int in_total, out_total;
 450
 451    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
 452    return in_bytes <= in_total && out_bytes <= out_total;
 453}
 454
 455static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
 456                               unsigned int max_num_sg, bool is_write,
 457                               hwaddr pa, size_t sz)
 458{
 459    unsigned num_sg = *p_num_sg;
 460    assert(num_sg <= max_num_sg);
 461
 462    if (!sz) {
 463        error_report("virtio: zero sized buffers are not allowed");
 464        exit(1);
 465    }
 466
 467    while (sz) {
 468        hwaddr len = sz;
 469
 470        if (num_sg == max_num_sg) {
 471            error_report("virtio: too many write descriptors in indirect table");
 472            exit(1);
 473        }
 474
 475        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
 476        iov[num_sg].iov_len = len;
 477        addr[num_sg] = pa;
 478
 479        sz -= len;
 480        pa += len;
 481        num_sg++;
 482    }
 483    *p_num_sg = num_sg;
 484}
 485
 486static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
 487                                unsigned int *num_sg, unsigned int max_size,
 488                                int is_write)
 489{
 490    unsigned int i;
 491    hwaddr len;
 492
 493    /* Note: this function MUST validate input, some callers
 494     * are passing in num_sg values received over the network.
 495     */
 496    /* TODO: teach all callers that this can fail, and return failure instead
 497     * of asserting here.
 498     * When we do, we might be able to re-enable NDEBUG below.
 499     */
 500#ifdef NDEBUG
 501#error building with NDEBUG is not supported
 502#endif
 503    assert(*num_sg <= max_size);
 504
 505    for (i = 0; i < *num_sg; i++) {
 506        len = sg[i].iov_len;
 507        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
 508        if (!sg[i].iov_base) {
 509            error_report("virtio: error trying to map MMIO memory");
 510            exit(1);
 511        }
 512        if (len != sg[i].iov_len) {
 513            error_report("virtio: unexpected memory split");
 514            exit(1);
 515        }
 516    }
 517}
 518
 519void virtqueue_map(VirtQueueElement *elem)
 520{
 521    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
 522                        VIRTQUEUE_MAX_SIZE, 1);
 523    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
 524                        VIRTQUEUE_MAX_SIZE, 0);
 525}
 526
 527void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 528{
 529    VirtQueueElement *elem;
 530    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
 531    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
 532    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
 533    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
 534    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
 535    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
 536
 537    assert(sz >= sizeof(VirtQueueElement));
 538    elem = g_malloc(out_sg_end);
 539    elem->out_num = out_num;
 540    elem->in_num = in_num;
 541    elem->in_addr = (void *)elem + in_addr_ofs;
 542    elem->out_addr = (void *)elem + out_addr_ofs;
 543    elem->in_sg = (void *)elem + in_sg_ofs;
 544    elem->out_sg = (void *)elem + out_sg_ofs;
 545    return elem;
 546}
 547
 548void *virtqueue_pop(VirtQueue *vq, size_t sz)
 549{
 550    unsigned int i, head, max;
 551    hwaddr desc_pa = vq->vring.desc;
 552    VirtIODevice *vdev = vq->vdev;
 553    VirtQueueElement *elem;
 554    unsigned out_num, in_num;
 555    hwaddr addr[VIRTQUEUE_MAX_SIZE];
 556    struct iovec iov[VIRTQUEUE_MAX_SIZE];
 557    VRingDesc desc;
 558
 559    if (virtio_queue_empty(vq)) {
 560        return NULL;
 561    }
 562    /* Needed after virtio_queue_empty(), see comment in
 563     * virtqueue_num_heads(). */
 564    smp_rmb();
 565
 566    /* When we start there are none of either input nor output. */
 567    out_num = in_num = 0;
 568
 569    max = vq->vring.num;
 570
 571    if (vq->inuse >= vq->vring.num) {
 572        error_report("Virtqueue size exceeded");
 573        exit(1);
 574    }
 575
 576    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
 577    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
 578        vring_set_avail_event(vq, vq->last_avail_idx);
 579    }
 580
 581    vring_desc_read(vdev, &desc, desc_pa, i);
 582    if (desc.flags & VRING_DESC_F_INDIRECT) {
 583        if (desc.len % sizeof(VRingDesc)) {
 584            error_report("Invalid size for indirect buffer table");
 585            exit(1);
 586        }
 587
 588        /* loop over the indirect descriptor table */
 589        max = desc.len / sizeof(VRingDesc);
 590        desc_pa = desc.addr;
 591        i = 0;
 592        vring_desc_read(vdev, &desc, desc_pa, i);
 593    }
 594
 595    /* Collect all the descriptors */
 596    do {
 597        if (desc.flags & VRING_DESC_F_WRITE) {
 598            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
 599                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
 600        } else {
 601            if (in_num) {
 602                error_report("Incorrect order for descriptors");
 603                exit(1);
 604            }
 605            virtqueue_map_desc(&out_num, addr, iov,
 606                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
 607        }
 608
 609        /* If we've got too many, that implies a descriptor loop. */
 610        if ((in_num + out_num) > max) {
 611            error_report("Looped descriptor");
 612            exit(1);
 613        }
 614    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
 615
 616    /* Now copy what we have collected and mapped */
 617    elem = virtqueue_alloc_element(sz, out_num, in_num);
 618    elem->index = head;
 619    for (i = 0; i < out_num; i++) {
 620        elem->out_addr[i] = addr[i];
 621        elem->out_sg[i] = iov[i];
 622    }
 623    for (i = 0; i < in_num; i++) {
 624        elem->in_addr[i] = addr[out_num + i];
 625        elem->in_sg[i] = iov[out_num + i];
 626    }
 627
 628    vq->inuse++;
 629
 630    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 631    return elem;
 632}
 633
 634/* Reading and writing a structure directly to QEMUFile is *awful*, but
 635 * it is what QEMU has always done by mistake.  We can change it sooner
 636 * or later by bumping the version number of the affected vm states.
 637 * In the meanwhile, since the in-memory layout of VirtQueueElement
 638 * has changed, we need to marshal to and from the layout that was
 639 * used before the change.
 640 */
 641typedef struct VirtQueueElementOld {
 642    unsigned int index;
 643    unsigned int out_num;
 644    unsigned int in_num;
 645    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
 646    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
 647    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
 648    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 649} VirtQueueElementOld;
 650
 651void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
 652{
 653    VirtQueueElement *elem;
 654    VirtQueueElementOld data;
 655    int i;
 656
 657    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 658
 659    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
 660    elem->index = data.index;
 661
 662    for (i = 0; i < elem->in_num; i++) {
 663        elem->in_addr[i] = data.in_addr[i];
 664    }
 665
 666    for (i = 0; i < elem->out_num; i++) {
 667        elem->out_addr[i] = data.out_addr[i];
 668    }
 669
 670    for (i = 0; i < elem->in_num; i++) {
 671        /* Base is overwritten by virtqueue_map.  */
 672        elem->in_sg[i].iov_base = 0;
 673        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
 674    }
 675
 676    for (i = 0; i < elem->out_num; i++) {
 677        /* Base is overwritten by virtqueue_map.  */
 678        elem->out_sg[i].iov_base = 0;
 679        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
 680    }
 681
 682    virtqueue_map(elem);
 683    return elem;
 684}
 685
 686void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
 687{
 688    VirtQueueElementOld data;
 689    int i;
 690
 691    memset(&data, 0, sizeof(data));
 692    data.index = elem->index;
 693    data.in_num = elem->in_num;
 694    data.out_num = elem->out_num;
 695
 696    for (i = 0; i < elem->in_num; i++) {
 697        data.in_addr[i] = elem->in_addr[i];
 698    }
 699
 700    for (i = 0; i < elem->out_num; i++) {
 701        data.out_addr[i] = elem->out_addr[i];
 702    }
 703
 704    for (i = 0; i < elem->in_num; i++) {
 705        /* Base is overwritten by virtqueue_map when loading.  Do not
 706         * save it, as it would leak the QEMU address space layout.  */
 707        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
 708    }
 709
 710    for (i = 0; i < elem->out_num; i++) {
 711        /* Do not save iov_base as above.  */
 712        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
 713    }
 714    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
 715}
 716
 717/* virtio device */
 718static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 719{
 720    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 721    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 722
 723    if (k->notify) {
 724        k->notify(qbus->parent, vector);
 725    }
 726}
 727
 728void virtio_update_irq(VirtIODevice *vdev)
 729{
 730    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 731}
 732
 733static int virtio_validate_features(VirtIODevice *vdev)
 734{
 735    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 736
 737    if (k->validate_features) {
 738        return k->validate_features(vdev);
 739    } else {
 740        return 0;
 741    }
 742}
 743
 744int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 745{
 746    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 747    trace_virtio_set_status(vdev, val);
 748
 749    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 750        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
 751            val & VIRTIO_CONFIG_S_FEATURES_OK) {
 752            int ret = virtio_validate_features(vdev);
 753
 754            if (ret) {
 755                return ret;
 756            }
 757        }
 758    }
 759    if (k->set_status) {
 760        k->set_status(vdev, val);
 761    }
 762    vdev->status = val;
 763    return 0;
 764}
 765
 766bool target_words_bigendian(void);
 767static enum virtio_device_endian virtio_default_endian(void)
 768{
 769    if (target_words_bigendian()) {
 770        return VIRTIO_DEVICE_ENDIAN_BIG;
 771    } else {
 772        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 773    }
 774}
 775
 776static enum virtio_device_endian virtio_current_cpu_endian(void)
 777{
 778    CPUClass *cc = CPU_GET_CLASS(current_cpu);
 779
 780    if (cc->virtio_is_big_endian(current_cpu)) {
 781        return VIRTIO_DEVICE_ENDIAN_BIG;
 782    } else {
 783        return VIRTIO_DEVICE_ENDIAN_LITTLE;
 784    }
 785}
 786
 787void virtio_reset(void *opaque)
 788{
 789    VirtIODevice *vdev = opaque;
 790    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 791    int i;
 792
 793    virtio_set_status(vdev, 0);
 794    if (current_cpu) {
 795        /* Guest initiated reset */
 796        vdev->device_endian = virtio_current_cpu_endian();
 797    } else {
 798        /* System reset */
 799        vdev->device_endian = virtio_default_endian();
 800    }
 801
 802    if (k->reset) {
 803        k->reset(vdev);
 804    }
 805
 806    vdev->guest_features = 0;
 807    vdev->queue_sel = 0;
 808    vdev->status = 0;
 809    vdev->isr = 0;
 810    vdev->config_vector = VIRTIO_NO_VECTOR;
 811    virtio_notify_vector(vdev, vdev->config_vector);
 812
 813    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
 814        vdev->vq[i].vring.desc = 0;
 815        vdev->vq[i].vring.avail = 0;
 816        vdev->vq[i].vring.used = 0;
 817        vdev->vq[i].last_avail_idx = 0;
 818        vdev->vq[i].shadow_avail_idx = 0;
 819        vdev->vq[i].used_idx = 0;
 820        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 821        vdev->vq[i].signalled_used = 0;
 822        vdev->vq[i].signalled_used_valid = false;
 823        vdev->vq[i].notification = true;
 824        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
 825    }
 826}
 827
 828uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 829{
 830    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 831    uint8_t val;
 832
 833    if (addr + sizeof(val) > vdev->config_len) {
 834        return (uint32_t)-1;
 835    }
 836
 837    k->get_config(vdev, vdev->config);
 838
 839    val = ldub_p(vdev->config + addr);
 840    return val;
 841}
 842
 843uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 844{
 845    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 846    uint16_t val;
 847
 848    if (addr + sizeof(val) > vdev->config_len) {
 849        return (uint32_t)-1;
 850    }
 851
 852    k->get_config(vdev, vdev->config);
 853
 854    val = lduw_p(vdev->config + addr);
 855    return val;
 856}
 857
 858uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 859{
 860    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 861    uint32_t val;
 862
 863    if (addr + sizeof(val) > vdev->config_len) {
 864        return (uint32_t)-1;
 865    }
 866
 867    k->get_config(vdev, vdev->config);
 868
 869    val = ldl_p(vdev->config + addr);
 870    return val;
 871}
 872
 873void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 874{
 875    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 876    uint8_t val = data;
 877
 878    if (addr + sizeof(val) > vdev->config_len) {
 879        return;
 880    }
 881
 882    stb_p(vdev->config + addr, val);
 883
 884    if (k->set_config) {
 885        k->set_config(vdev, vdev->config);
 886    }
 887}
 888
 889void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 890{
 891    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 892    uint16_t val = data;
 893
 894    if (addr + sizeof(val) > vdev->config_len) {
 895        return;
 896    }
 897
 898    stw_p(vdev->config + addr, val);
 899
 900    if (k->set_config) {
 901        k->set_config(vdev, vdev->config);
 902    }
 903}
 904
 905void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 906{
 907    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 908    uint32_t val = data;
 909
 910    if (addr + sizeof(val) > vdev->config_len) {
 911        return;
 912    }
 913
 914    stl_p(vdev->config + addr, val);
 915
 916    if (k->set_config) {
 917        k->set_config(vdev, vdev->config);
 918    }
 919}
 920
 921uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
 922{
 923    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 924    uint8_t val;
 925
 926    if (addr + sizeof(val) > vdev->config_len) {
 927        return (uint32_t)-1;
 928    }
 929
 930    k->get_config(vdev, vdev->config);
 931
 932    val = ldub_p(vdev->config + addr);
 933    return val;
 934}
 935
 936uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
 937{
 938    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 939    uint16_t val;
 940
 941    if (addr + sizeof(val) > vdev->config_len) {
 942        return (uint32_t)-1;
 943    }
 944
 945    k->get_config(vdev, vdev->config);
 946
 947    val = lduw_le_p(vdev->config + addr);
 948    return val;
 949}
 950
 951uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
 952{
 953    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 954    uint32_t val;
 955
 956    if (addr + sizeof(val) > vdev->config_len) {
 957        return (uint32_t)-1;
 958    }
 959
 960    k->get_config(vdev, vdev->config);
 961
 962    val = ldl_le_p(vdev->config + addr);
 963    return val;
 964}
 965
 966void virtio_config_modern_writeb(VirtIODevice *vdev,
 967                                 uint32_t addr, uint32_t data)
 968{
 969    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 970    uint8_t val = data;
 971
 972    if (addr + sizeof(val) > vdev->config_len) {
 973        return;
 974    }
 975
 976    stb_p(vdev->config + addr, val);
 977
 978    if (k->set_config) {
 979        k->set_config(vdev, vdev->config);
 980    }
 981}
 982
 983void virtio_config_modern_writew(VirtIODevice *vdev,
 984                                 uint32_t addr, uint32_t data)
 985{
 986    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 987    uint16_t val = data;
 988
 989    if (addr + sizeof(val) > vdev->config_len) {
 990        return;
 991    }
 992
 993    stw_le_p(vdev->config + addr, val);
 994
 995    if (k->set_config) {
 996        k->set_config(vdev, vdev->config);
 997    }
 998}
 999
1000void virtio_config_modern_writel(VirtIODevice *vdev,
1001                                 uint32_t addr, uint32_t data)
1002{
1003    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1004    uint32_t val = data;
1005
1006    if (addr + sizeof(val) > vdev->config_len) {
1007        return;
1008    }
1009
1010    stl_le_p(vdev->config + addr, val);
1011
1012    if (k->set_config) {
1013        k->set_config(vdev, vdev->config);
1014    }
1015}
1016
1017void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1018{
1019    vdev->vq[n].vring.desc = addr;
1020    virtio_queue_update_rings(vdev, n);
1021}
1022
1023hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1024{
1025    return vdev->vq[n].vring.desc;
1026}
1027
1028void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1029                            hwaddr avail, hwaddr used)
1030{
1031    vdev->vq[n].vring.desc = desc;
1032    vdev->vq[n].vring.avail = avail;
1033    vdev->vq[n].vring.used = used;
1034}
1035
1036void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1037{
1038    /* Don't allow guest to flip queue between existent and
1039     * nonexistent states, or to set it to an invalid size.
1040     */
1041    if (!!num != !!vdev->vq[n].vring.num ||
1042        num > VIRTQUEUE_MAX_SIZE ||
1043        num < 0) {
1044        return;
1045    }
1046    vdev->vq[n].vring.num = num;
1047}
1048
1049VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1050{
1051    return QLIST_FIRST(&vdev->vector_queues[vector]);
1052}
1053
1054VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1055{
1056    return QLIST_NEXT(vq, node);
1057}
1058
1059int virtio_queue_get_num(VirtIODevice *vdev, int n)
1060{
1061    return vdev->vq[n].vring.num;
1062}
1063
1064int virtio_get_num_queues(VirtIODevice *vdev)
1065{
1066    int i;
1067
1068    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1069        if (!virtio_queue_get_num(vdev, i)) {
1070            break;
1071        }
1072    }
1073
1074    return i;
1075}
1076
1077void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1078{
1079    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1080    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1081
1082    /* virtio-1 compliant devices cannot change the alignment */
1083    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1084        error_report("tried to modify queue alignment for virtio-1 device");
1085        return;
1086    }
1087    /* Check that the transport told us it was going to do this
1088     * (so a buggy transport will immediately assert rather than
1089     * silently failing to migrate this state)
1090     */
1091    assert(k->has_variable_vring_alignment);
1092
1093    vdev->vq[n].vring.align = align;
1094    virtio_queue_update_rings(vdev, n);
1095}
1096
1097static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1098{
1099    if (vq->vring.desc && vq->handle_aio_output) {
1100        VirtIODevice *vdev = vq->vdev;
1101
1102        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1103        vq->handle_aio_output(vdev, vq);
1104    }
1105}
1106
1107static void virtio_queue_notify_vq(VirtQueue *vq)
1108{
1109    if (vq->vring.desc && vq->handle_output) {
1110        VirtIODevice *vdev = vq->vdev;
1111
1112        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1113        vq->handle_output(vdev, vq);
1114    }
1115}
1116
1117void virtio_queue_notify(VirtIODevice *vdev, int n)
1118{
1119    virtio_queue_notify_vq(&vdev->vq[n]);
1120}
1121
1122uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1123{
1124    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1125        VIRTIO_NO_VECTOR;
1126}
1127
1128void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1129{
1130    VirtQueue *vq = &vdev->vq[n];
1131
1132    if (n < VIRTIO_QUEUE_MAX) {
1133        if (vdev->vector_queues &&
1134            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1135            QLIST_REMOVE(vq, node);
1136        }
1137        vdev->vq[n].vector = vector;
1138        if (vdev->vector_queues &&
1139            vector != VIRTIO_NO_VECTOR) {
1140            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1141        }
1142    }
1143}
1144
1145static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
1146                                            VirtIOHandleOutput handle_output,
1147                                            bool use_aio)
1148{
1149    int i;
1150
1151    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1152        if (vdev->vq[i].vring.num == 0)
1153            break;
1154    }
1155
1156    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1157        abort();
1158
1159    vdev->vq[i].vring.num = queue_size;
1160    vdev->vq[i].vring.num_default = queue_size;
1161    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1162    vdev->vq[i].handle_output = handle_output;
1163    vdev->vq[i].handle_aio_output = NULL;
1164    vdev->vq[i].use_aio = use_aio;
1165
1166    return &vdev->vq[i];
1167}
1168
1169/* Add a virt queue and mark AIO.
1170 * An AIO queue will use the AioContext based event interface instead of the
1171 * default IOHandler and EventNotifier interface.
1172 */
1173VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
1174                                VirtIOHandleOutput handle_output)
1175{
1176    return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
1177}
1178
1179/* Add a normal virt queue (on the contrary to the AIO version above. */
1180VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1181                            VirtIOHandleOutput handle_output)
1182{
1183    return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
1184}
1185
1186void virtio_del_queue(VirtIODevice *vdev, int n)
1187{
1188    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1189        abort();
1190    }
1191
1192    vdev->vq[n].vring.num = 0;
1193    vdev->vq[n].vring.num_default = 0;
1194}
1195
1196void virtio_irq(VirtQueue *vq)
1197{
1198    trace_virtio_irq(vq);
1199    vq->vdev->isr |= 0x01;
1200    virtio_notify_vector(vq->vdev, vq->vector);
1201}
1202
1203bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1204{
1205    uint16_t old, new;
1206    bool v;
1207    /* We need to expose used array entries before checking used event. */
1208    smp_mb();
1209    /* Always notify when queue is empty (when feature acknowledge) */
1210    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1211        !vq->inuse && virtio_queue_empty(vq)) {
1212        return true;
1213    }
1214
1215    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1216        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1217    }
1218
1219    v = vq->signalled_used_valid;
1220    vq->signalled_used_valid = true;
1221    old = vq->signalled_used;
1222    new = vq->signalled_used = vq->used_idx;
1223    return !v || vring_need_event(vring_get_used_event(vq), new, old);
1224}
1225
1226void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1227{
1228    if (!virtio_should_notify(vdev, vq)) {
1229        return;
1230    }
1231
1232    trace_virtio_notify(vdev, vq);
1233    vdev->isr |= 0x01;
1234    virtio_notify_vector(vdev, vq->vector);
1235}
1236
1237void virtio_notify_config(VirtIODevice *vdev)
1238{
1239    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1240        return;
1241
1242    vdev->isr |= 0x03;
1243    vdev->generation++;
1244    virtio_notify_vector(vdev, vdev->config_vector);
1245}
1246
1247static bool virtio_device_endian_needed(void *opaque)
1248{
1249    VirtIODevice *vdev = opaque;
1250
1251    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1252    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1253        return vdev->device_endian != virtio_default_endian();
1254    }
1255    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1256    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1257}
1258
1259static bool virtio_64bit_features_needed(void *opaque)
1260{
1261    VirtIODevice *vdev = opaque;
1262
1263    return (vdev->host_features >> 32) != 0;
1264}
1265
1266static bool virtio_virtqueue_needed(void *opaque)
1267{
1268    VirtIODevice *vdev = opaque;
1269
1270    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1271}
1272
1273static bool virtio_ringsize_needed(void *opaque)
1274{
1275    VirtIODevice *vdev = opaque;
1276    int i;
1277
1278    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1279        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1280            return true;
1281        }
1282    }
1283    return false;
1284}
1285
1286static bool virtio_extra_state_needed(void *opaque)
1287{
1288    VirtIODevice *vdev = opaque;
1289    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1290    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1291
1292    return k->has_extra_state &&
1293        k->has_extra_state(qbus->parent);
1294}
1295
1296static const VMStateDescription vmstate_virtqueue = {
1297    .name = "virtqueue_state",
1298    .version_id = 1,
1299    .minimum_version_id = 1,
1300    .fields = (VMStateField[]) {
1301        VMSTATE_UINT64(vring.avail, struct VirtQueue),
1302        VMSTATE_UINT64(vring.used, struct VirtQueue),
1303        VMSTATE_END_OF_LIST()
1304    }
1305};
1306
1307static const VMStateDescription vmstate_virtio_virtqueues = {
1308    .name = "virtio/virtqueues",
1309    .version_id = 1,
1310    .minimum_version_id = 1,
1311    .needed = &virtio_virtqueue_needed,
1312    .fields = (VMStateField[]) {
1313        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1314                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1315        VMSTATE_END_OF_LIST()
1316    }
1317};
1318
1319static const VMStateDescription vmstate_ringsize = {
1320    .name = "ringsize_state",
1321    .version_id = 1,
1322    .minimum_version_id = 1,
1323    .fields = (VMStateField[]) {
1324        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1325        VMSTATE_END_OF_LIST()
1326    }
1327};
1328
1329static const VMStateDescription vmstate_virtio_ringsize = {
1330    .name = "virtio/ringsize",
1331    .version_id = 1,
1332    .minimum_version_id = 1,
1333    .needed = &virtio_ringsize_needed,
1334    .fields = (VMStateField[]) {
1335        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1336                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1337        VMSTATE_END_OF_LIST()
1338    }
1339};
1340
1341static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1342{
1343    VirtIODevice *vdev = pv;
1344    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1345    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1346
1347    if (!k->load_extra_state) {
1348        return -1;
1349    } else {
1350        return k->load_extra_state(qbus->parent, f);
1351    }
1352}
1353
1354static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1355{
1356    VirtIODevice *vdev = pv;
1357    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1358    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1359
1360    k->save_extra_state(qbus->parent, f);
1361}
1362
1363static const VMStateInfo vmstate_info_extra_state = {
1364    .name = "virtqueue_extra_state",
1365    .get = get_extra_state,
1366    .put = put_extra_state,
1367};
1368
1369static const VMStateDescription vmstate_virtio_extra_state = {
1370    .name = "virtio/extra_state",
1371    .version_id = 1,
1372    .minimum_version_id = 1,
1373    .needed = &virtio_extra_state_needed,
1374    .fields = (VMStateField[]) {
1375        {
1376            .name         = "extra_state",
1377            .version_id   = 0,
1378            .field_exists = NULL,
1379            .size         = 0,
1380            .info         = &vmstate_info_extra_state,
1381            .flags        = VMS_SINGLE,
1382            .offset       = 0,
1383        },
1384        VMSTATE_END_OF_LIST()
1385    }
1386};
1387
1388static const VMStateDescription vmstate_virtio_device_endian = {
1389    .name = "virtio/device_endian",
1390    .version_id = 1,
1391    .minimum_version_id = 1,
1392    .needed = &virtio_device_endian_needed,
1393    .fields = (VMStateField[]) {
1394        VMSTATE_UINT8(device_endian, VirtIODevice),
1395        VMSTATE_END_OF_LIST()
1396    }
1397};
1398
1399static const VMStateDescription vmstate_virtio_64bit_features = {
1400    .name = "virtio/64bit_features",
1401    .version_id = 1,
1402    .minimum_version_id = 1,
1403    .needed = &virtio_64bit_features_needed,
1404    .fields = (VMStateField[]) {
1405        VMSTATE_UINT64(guest_features, VirtIODevice),
1406        VMSTATE_END_OF_LIST()
1407    }
1408};
1409
1410static const VMStateDescription vmstate_virtio = {
1411    .name = "virtio",
1412    .version_id = 1,
1413    .minimum_version_id = 1,
1414    .minimum_version_id_old = 1,
1415    .fields = (VMStateField[]) {
1416        VMSTATE_END_OF_LIST()
1417    },
1418    .subsections = (const VMStateDescription*[]) {
1419        &vmstate_virtio_device_endian,
1420        &vmstate_virtio_64bit_features,
1421        &vmstate_virtio_virtqueues,
1422        &vmstate_virtio_ringsize,
1423        &vmstate_virtio_extra_state,
1424        NULL
1425    }
1426};
1427
1428void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1429{
1430    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1431    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1432    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1433    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1434    int i;
1435
1436    if (k->save_config) {
1437        k->save_config(qbus->parent, f);
1438    }
1439
1440    qemu_put_8s(f, &vdev->status);
1441    qemu_put_8s(f, &vdev->isr);
1442    qemu_put_be16s(f, &vdev->queue_sel);
1443    qemu_put_be32s(f, &guest_features_lo);
1444    qemu_put_be32(f, vdev->config_len);
1445    qemu_put_buffer(f, vdev->config, vdev->config_len);
1446
1447    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1448        if (vdev->vq[i].vring.num == 0)
1449            break;
1450    }
1451
1452    qemu_put_be32(f, i);
1453
1454    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1455        if (vdev->vq[i].vring.num == 0)
1456            break;
1457
1458        qemu_put_be32(f, vdev->vq[i].vring.num);
1459        if (k->has_variable_vring_alignment) {
1460            qemu_put_be32(f, vdev->vq[i].vring.align);
1461        }
1462        /* XXX virtio-1 devices */
1463        qemu_put_be64(f, vdev->vq[i].vring.desc);
1464        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1465        if (k->save_queue) {
1466            k->save_queue(qbus->parent, i, f);
1467        }
1468    }
1469
1470    if (vdc->save != NULL) {
1471        vdc->save(vdev, f);
1472    }
1473
1474    /* Subsections */
1475    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1476}
1477
1478/* A wrapper for use as a VMState .put function */
1479void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size)
1480{
1481    virtio_save(VIRTIO_DEVICE(opaque), f);
1482}
1483
1484static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1485{
1486    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1487    bool bad = (val & ~(vdev->host_features)) != 0;
1488
1489    val &= vdev->host_features;
1490    if (k->set_features) {
1491        k->set_features(vdev, val);
1492    }
1493    vdev->guest_features = val;
1494    return bad ? -1 : 0;
1495}
1496
1497int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1498{
1499   /*
1500     * The driver must not attempt to set features after feature negotiation
1501     * has finished.
1502     */
1503    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1504        return -EINVAL;
1505    }
1506    return virtio_set_features_nocheck(vdev, val);
1507}
1508
1509int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1510{
1511    int i, ret;
1512    int32_t config_len;
1513    uint32_t num;
1514    uint32_t features;
1515    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1516    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1517    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1518
1519    /*
1520     * We poison the endianness to ensure it does not get used before
1521     * subsections have been loaded.
1522     */
1523    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1524
1525    if (k->load_config) {
1526        ret = k->load_config(qbus->parent, f);
1527        if (ret)
1528            return ret;
1529    }
1530
1531    qemu_get_8s(f, &vdev->status);
1532    qemu_get_8s(f, &vdev->isr);
1533    qemu_get_be16s(f, &vdev->queue_sel);
1534    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1535        return -1;
1536    }
1537    qemu_get_be32s(f, &features);
1538
1539    /*
1540     * Temporarily set guest_features low bits - needed by
1541     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1542     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1543     *
1544     * Note: devices should always test host features in future - don't create
1545     * new dependencies like this.
1546     */
1547    vdev->guest_features = features;
1548
1549    config_len = qemu_get_be32(f);
1550
1551    /*
1552     * There are cases where the incoming config can be bigger or smaller
1553     * than what we have; so load what we have space for, and skip
1554     * any excess that's in the stream.
1555     */
1556    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1557
1558    while (config_len > vdev->config_len) {
1559        qemu_get_byte(f);
1560        config_len--;
1561    }
1562
1563    num = qemu_get_be32(f);
1564
1565    if (num > VIRTIO_QUEUE_MAX) {
1566        error_report("Invalid number of virtqueues: 0x%x", num);
1567        return -1;
1568    }
1569
1570    for (i = 0; i < num; i++) {
1571        vdev->vq[i].vring.num = qemu_get_be32(f);
1572        if (k->has_variable_vring_alignment) {
1573            vdev->vq[i].vring.align = qemu_get_be32(f);
1574        }
1575        vdev->vq[i].vring.desc = qemu_get_be64(f);
1576        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1577        vdev->vq[i].signalled_used_valid = false;
1578        vdev->vq[i].notification = true;
1579
1580        if (vdev->vq[i].vring.desc) {
1581            /* XXX virtio-1 devices */
1582            virtio_queue_update_rings(vdev, i);
1583        } else if (vdev->vq[i].last_avail_idx) {
1584            error_report("VQ %d address 0x0 "
1585                         "inconsistent with Host index 0x%x",
1586                         i, vdev->vq[i].last_avail_idx);
1587                return -1;
1588        }
1589        if (k->load_queue) {
1590            ret = k->load_queue(qbus->parent, i, f);
1591            if (ret)
1592                return ret;
1593        }
1594    }
1595
1596    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1597
1598    if (vdc->load != NULL) {
1599        ret = vdc->load(vdev, f, version_id);
1600        if (ret) {
1601            return ret;
1602        }
1603    }
1604
1605    /* Subsections */
1606    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1607    if (ret) {
1608        return ret;
1609    }
1610
1611    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1612        vdev->device_endian = virtio_default_endian();
1613    }
1614
1615    if (virtio_64bit_features_needed(vdev)) {
1616        /*
1617         * Subsection load filled vdev->guest_features.  Run them
1618         * through virtio_set_features to sanity-check them against
1619         * host_features.
1620         */
1621        uint64_t features64 = vdev->guest_features;
1622        if (virtio_set_features_nocheck(vdev, features64) < 0) {
1623            error_report("Features 0x%" PRIx64 " unsupported. "
1624                         "Allowed features: 0x%" PRIx64,
1625                         features64, vdev->host_features);
1626            return -1;
1627        }
1628    } else {
1629        if (virtio_set_features_nocheck(vdev, features) < 0) {
1630            error_report("Features 0x%x unsupported. "
1631                         "Allowed features: 0x%" PRIx64,
1632                         features, vdev->host_features);
1633            return -1;
1634        }
1635    }
1636
1637    for (i = 0; i < num; i++) {
1638        if (vdev->vq[i].vring.desc) {
1639            uint16_t nheads;
1640            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1641            /* Check it isn't doing strange things with descriptor numbers. */
1642            if (nheads > vdev->vq[i].vring.num) {
1643                error_report("VQ %d size 0x%x Guest index 0x%x "
1644                             "inconsistent with Host index 0x%x: delta 0x%x",
1645                             i, vdev->vq[i].vring.num,
1646                             vring_avail_idx(&vdev->vq[i]),
1647                             vdev->vq[i].last_avail_idx, nheads);
1648                return -1;
1649            }
1650            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1651            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1652
1653            /*
1654             * Some devices migrate VirtQueueElements that have been popped
1655             * from the avail ring but not yet returned to the used ring.
1656             */
1657            vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1658                                vdev->vq[i].used_idx;
1659            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1660                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1661                             "used_idx 0x%x",
1662                             i, vdev->vq[i].vring.num,
1663                             vdev->vq[i].last_avail_idx,
1664                             vdev->vq[i].used_idx);
1665                return -1;
1666            }
1667        }
1668    }
1669
1670    return 0;
1671}
1672
1673void virtio_cleanup(VirtIODevice *vdev)
1674{
1675    qemu_del_vm_change_state_handler(vdev->vmstate);
1676    g_free(vdev->config);
1677    g_free(vdev->vq);
1678    g_free(vdev->vector_queues);
1679}
1680
1681static void virtio_vmstate_change(void *opaque, int running, RunState state)
1682{
1683    VirtIODevice *vdev = opaque;
1684    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1685    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1686    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1687    vdev->vm_running = running;
1688
1689    if (backend_run) {
1690        virtio_set_status(vdev, vdev->status);
1691    }
1692
1693    if (k->vmstate_change) {
1694        k->vmstate_change(qbus->parent, backend_run);
1695    }
1696
1697    if (!backend_run) {
1698        virtio_set_status(vdev, vdev->status);
1699    }
1700}
1701
1702void virtio_instance_init_common(Object *proxy_obj, void *data,
1703                                 size_t vdev_size, const char *vdev_name)
1704{
1705    DeviceState *vdev = data;
1706
1707    object_initialize(vdev, vdev_size, vdev_name);
1708    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1709    object_unref(OBJECT(vdev));
1710    qdev_alias_all_properties(vdev, proxy_obj);
1711}
1712
1713void virtio_init(VirtIODevice *vdev, const char *name,
1714                 uint16_t device_id, size_t config_size)
1715{
1716    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1717    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1718    int i;
1719    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1720
1721    if (nvectors) {
1722        vdev->vector_queues =
1723            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1724    }
1725
1726    vdev->device_id = device_id;
1727    vdev->status = 0;
1728    vdev->isr = 0;
1729    vdev->queue_sel = 0;
1730    vdev->config_vector = VIRTIO_NO_VECTOR;
1731    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1732    vdev->vm_running = runstate_is_running();
1733    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1734        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1735        vdev->vq[i].vdev = vdev;
1736        vdev->vq[i].queue_index = i;
1737    }
1738
1739    vdev->name = name;
1740    vdev->config_len = config_size;
1741    if (vdev->config_len) {
1742        vdev->config = g_malloc0(config_size);
1743    } else {
1744        vdev->config = NULL;
1745    }
1746    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1747                                                     vdev);
1748    vdev->device_endian = virtio_default_endian();
1749    vdev->use_guest_notifier_mask = true;
1750}
1751
1752hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1753{
1754    return vdev->vq[n].vring.desc;
1755}
1756
1757hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1758{
1759    return vdev->vq[n].vring.avail;
1760}
1761
1762hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1763{
1764    return vdev->vq[n].vring.used;
1765}
1766
1767hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1768{
1769    return vdev->vq[n].vring.desc;
1770}
1771
1772hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1773{
1774    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1775}
1776
1777hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1778{
1779    return offsetof(VRingAvail, ring) +
1780        sizeof(uint16_t) * vdev->vq[n].vring.num;
1781}
1782
1783hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1784{
1785    return offsetof(VRingUsed, ring) +
1786        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1787}
1788
1789hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1790{
1791    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1792            virtio_queue_get_used_size(vdev, n);
1793}
1794
1795uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1796{
1797    return vdev->vq[n].last_avail_idx;
1798}
1799
1800void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1801{
1802    vdev->vq[n].last_avail_idx = idx;
1803    vdev->vq[n].shadow_avail_idx = idx;
1804}
1805
1806void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1807{
1808    vdev->vq[n].signalled_used_valid = false;
1809}
1810
1811VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1812{
1813    return vdev->vq + n;
1814}
1815
1816uint16_t virtio_get_queue_index(VirtQueue *vq)
1817{
1818    return vq->queue_index;
1819}
1820
1821static void virtio_queue_guest_notifier_read(EventNotifier *n)
1822{
1823    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1824    if (event_notifier_test_and_clear(n)) {
1825        virtio_irq(vq);
1826    }
1827}
1828
1829void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1830                                                bool with_irqfd)
1831{
1832    if (assign && !with_irqfd) {
1833        event_notifier_set_handler(&vq->guest_notifier, false,
1834                                   virtio_queue_guest_notifier_read);
1835    } else {
1836        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
1837    }
1838    if (!assign) {
1839        /* Test and clear notifier before closing it,
1840         * in case poll callback didn't have time to run. */
1841        virtio_queue_guest_notifier_read(&vq->guest_notifier);
1842    }
1843}
1844
1845EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1846{
1847    return &vq->guest_notifier;
1848}
1849
1850static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
1851{
1852    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1853    if (event_notifier_test_and_clear(n)) {
1854        virtio_queue_notify_aio_vq(vq);
1855    }
1856}
1857
1858void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
1859                                                VirtIOHandleOutput handle_output)
1860{
1861    if (handle_output) {
1862        vq->handle_aio_output = handle_output;
1863        aio_set_event_notifier(ctx, &vq->host_notifier, true,
1864                               virtio_queue_host_notifier_aio_read);
1865    } else {
1866        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
1867        /* Test and clear notifier before after disabling event,
1868         * in case poll callback didn't have time to run. */
1869        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
1870        vq->handle_aio_output = NULL;
1871    }
1872}
1873
1874static void virtio_queue_host_notifier_read(EventNotifier *n)
1875{
1876    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1877    if (event_notifier_test_and_clear(n)) {
1878        virtio_queue_notify_vq(vq);
1879    }
1880}
1881
1882void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1883                                               bool set_handler)
1884{
1885    AioContext *ctx = qemu_get_aio_context();
1886    if (assign && set_handler) {
1887        if (vq->use_aio) {
1888            aio_set_event_notifier(ctx, &vq->host_notifier, true,
1889                                   virtio_queue_host_notifier_read);
1890        } else {
1891            event_notifier_set_handler(&vq->host_notifier, true,
1892                                       virtio_queue_host_notifier_read);
1893        }
1894    } else {
1895        if (vq->use_aio) {
1896            aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
1897        } else {
1898            event_notifier_set_handler(&vq->host_notifier, true, NULL);
1899        }
1900    }
1901    if (!assign) {
1902        /* Test and clear notifier before after disabling event,
1903         * in case poll callback didn't have time to run. */
1904        virtio_queue_host_notifier_read(&vq->host_notifier);
1905    }
1906}
1907
1908EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1909{
1910    return &vq->host_notifier;
1911}
1912
1913void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1914{
1915    g_free(vdev->bus_name);
1916    vdev->bus_name = g_strdup(bus_name);
1917}
1918
1919static void virtio_device_realize(DeviceState *dev, Error **errp)
1920{
1921    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1922    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1923    Error *err = NULL;
1924
1925    if (vdc->realize != NULL) {
1926        vdc->realize(dev, &err);
1927        if (err != NULL) {
1928            error_propagate(errp, err);
1929            return;
1930        }
1931    }
1932
1933    virtio_bus_device_plugged(vdev, &err);
1934    if (err != NULL) {
1935        error_propagate(errp, err);
1936        return;
1937    }
1938}
1939
1940static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1941{
1942    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1943    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1944    Error *err = NULL;
1945
1946    virtio_bus_device_unplugged(vdev);
1947
1948    if (vdc->unrealize != NULL) {
1949        vdc->unrealize(dev, &err);
1950        if (err != NULL) {
1951            error_propagate(errp, err);
1952            return;
1953        }
1954    }
1955
1956    g_free(vdev->bus_name);
1957    vdev->bus_name = NULL;
1958}
1959
1960static Property virtio_properties[] = {
1961    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
1962    DEFINE_PROP_END_OF_LIST(),
1963};
1964
1965static void virtio_device_class_init(ObjectClass *klass, void *data)
1966{
1967    /* Set the default value here. */
1968    DeviceClass *dc = DEVICE_CLASS(klass);
1969
1970    dc->realize = virtio_device_realize;
1971    dc->unrealize = virtio_device_unrealize;
1972    dc->bus_type = TYPE_VIRTIO_BUS;
1973    dc->props = virtio_properties;
1974}
1975
1976static const TypeInfo virtio_device_info = {
1977    .name = TYPE_VIRTIO_DEVICE,
1978    .parent = TYPE_DEVICE,
1979    .instance_size = sizeof(VirtIODevice),
1980    .class_init = virtio_device_class_init,
1981    .abstract = true,
1982    .class_size = sizeof(VirtioDeviceClass),
1983};
1984
1985static void virtio_register_types(void)
1986{
1987    type_register_static(&virtio_device_info);
1988}
1989
1990type_init(virtio_register_types)
1991