qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "cpu.h"
  17#include "trace.h"
  18#include "qemu/error-report.h"
  19#include "qemu/log.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "qemu/atomic.h"
  25#include "hw/virtio/virtio-bus.h"
  26#include "hw/qdev-properties.h"
  27#include "hw/virtio/virtio-access.h"
  28#include "sysemu/dma.h"
  29#include "sysemu/runstate.h"
  30#include "standard-headers/linux/virtio_ids.h"
  31
  32/*
  33 * The alignment to use between consumer and producer parts of vring.
  34 * x86 pagesize again. This is the default, used by transports like PCI
  35 * which don't provide a means for the guest to tell the host the alignment.
  36 */
  37#define VIRTIO_PCI_VRING_ALIGN         4096
  38
  39typedef struct VRingDesc
  40{
  41    uint64_t addr;
  42    uint32_t len;
  43    uint16_t flags;
  44    uint16_t next;
  45} VRingDesc;
  46
  47typedef struct VRingPackedDesc {
  48    uint64_t addr;
  49    uint32_t len;
  50    uint16_t id;
  51    uint16_t flags;
  52} VRingPackedDesc;
  53
  54typedef struct VRingAvail
  55{
  56    uint16_t flags;
  57    uint16_t idx;
  58    uint16_t ring[];
  59} VRingAvail;
  60
  61typedef struct VRingUsedElem
  62{
  63    uint32_t id;
  64    uint32_t len;
  65} VRingUsedElem;
  66
  67typedef struct VRingUsed
  68{
  69    uint16_t flags;
  70    uint16_t idx;
  71    VRingUsedElem ring[];
  72} VRingUsed;
  73
  74typedef struct VRingMemoryRegionCaches {
  75    struct rcu_head rcu;
  76    MemoryRegionCache desc;
  77    MemoryRegionCache avail;
  78    MemoryRegionCache used;
  79} VRingMemoryRegionCaches;
  80
  81typedef struct VRing
  82{
  83    unsigned int num;
  84    unsigned int num_default;
  85    unsigned int align;
  86    hwaddr desc;
  87    hwaddr avail;
  88    hwaddr used;
  89    VRingMemoryRegionCaches *caches;
  90} VRing;
  91
  92typedef struct VRingPackedDescEvent {
  93    uint16_t off_wrap;
  94    uint16_t flags;
  95} VRingPackedDescEvent ;
  96
  97struct VirtQueue
  98{
  99    VRing vring;
 100    VirtQueueElement *used_elems;
 101
 102    /* Next head to pop */
 103    uint16_t last_avail_idx;
 104    bool last_avail_wrap_counter;
 105
 106    /* Last avail_idx read from VQ. */
 107    uint16_t shadow_avail_idx;
 108    bool shadow_avail_wrap_counter;
 109
 110    uint16_t used_idx;
 111    bool used_wrap_counter;
 112
 113    /* Last used index value we have signalled on */
 114    uint16_t signalled_used;
 115
 116    /* Last used index value we have signalled on */
 117    bool signalled_used_valid;
 118
 119    /* Notification enabled? */
 120    bool notification;
 121
 122    uint16_t queue_index;
 123
 124    unsigned int inuse;
 125
 126    uint16_t vector;
 127    VirtIOHandleOutput handle_output;
 128    VirtIOHandleAIOOutput handle_aio_output;
 129    VirtIODevice *vdev;
 130    EventNotifier guest_notifier;
 131    EventNotifier host_notifier;
 132    bool host_notifier_enabled;
 133    QLIST_ENTRY(VirtQueue) node;
 134};
 135
 136static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 137{
 138    if (!caches) {
 139        return;
 140    }
 141
 142    address_space_cache_destroy(&caches->desc);
 143    address_space_cache_destroy(&caches->avail);
 144    address_space_cache_destroy(&caches->used);
 145    g_free(caches);
 146}
 147
 148static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 149{
 150    VRingMemoryRegionCaches *caches;
 151
 152    caches = qatomic_read(&vq->vring.caches);
 153    qatomic_rcu_set(&vq->vring.caches, NULL);
 154    if (caches) {
 155        call_rcu(caches, virtio_free_region_cache, rcu);
 156    }
 157}
 158
 159static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 160{
 161    VirtQueue *vq = &vdev->vq[n];
 162    VRingMemoryRegionCaches *old = vq->vring.caches;
 163    VRingMemoryRegionCaches *new = NULL;
 164    hwaddr addr, size;
 165    int64_t len;
 166    bool packed;
 167
 168
 169    addr = vq->vring.desc;
 170    if (!addr) {
 171        goto out_no_cache;
 172    }
 173    new = g_new0(VRingMemoryRegionCaches, 1);
 174    size = virtio_queue_get_desc_size(vdev, n);
 175    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 176                                   true : false;
 177    len = address_space_cache_init(&new->desc, vdev->dma_as,
 178                                   addr, size, packed);
 179    if (len < size) {
 180        virtio_error(vdev, "Cannot map desc");
 181        goto err_desc;
 182    }
 183
 184    size = virtio_queue_get_used_size(vdev, n);
 185    len = address_space_cache_init(&new->used, vdev->dma_as,
 186                                   vq->vring.used, size, true);
 187    if (len < size) {
 188        virtio_error(vdev, "Cannot map used");
 189        goto err_used;
 190    }
 191
 192    size = virtio_queue_get_avail_size(vdev, n);
 193    len = address_space_cache_init(&new->avail, vdev->dma_as,
 194                                   vq->vring.avail, size, false);
 195    if (len < size) {
 196        virtio_error(vdev, "Cannot map avail");
 197        goto err_avail;
 198    }
 199
 200    qatomic_rcu_set(&vq->vring.caches, new);
 201    if (old) {
 202        call_rcu(old, virtio_free_region_cache, rcu);
 203    }
 204    return;
 205
 206err_avail:
 207    address_space_cache_destroy(&new->avail);
 208err_used:
 209    address_space_cache_destroy(&new->used);
 210err_desc:
 211    address_space_cache_destroy(&new->desc);
 212out_no_cache:
 213    g_free(new);
 214    virtio_virtqueue_reset_region_cache(vq);
 215}
 216
 217/* virt queue functions */
 218void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 219{
 220    VRing *vring = &vdev->vq[n].vring;
 221
 222    if (!vring->num || !vring->desc || !vring->align) {
 223        /* not yet setup -> nothing to do */
 224        return;
 225    }
 226    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 227    vring->used = vring_align(vring->avail +
 228                              offsetof(VRingAvail, ring[vring->num]),
 229                              vring->align);
 230    virtio_init_region_cache(vdev, n);
 231}
 232
 233/* Called within rcu_read_lock().  */
 234static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 235                                  MemoryRegionCache *cache, int i)
 236{
 237    address_space_read_cached(cache, i * sizeof(VRingDesc),
 238                              desc, sizeof(VRingDesc));
 239    virtio_tswap64s(vdev, &desc->addr);
 240    virtio_tswap32s(vdev, &desc->len);
 241    virtio_tswap16s(vdev, &desc->flags);
 242    virtio_tswap16s(vdev, &desc->next);
 243}
 244
 245static void vring_packed_event_read(VirtIODevice *vdev,
 246                                    MemoryRegionCache *cache,
 247                                    VRingPackedDescEvent *e)
 248{
 249    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 250    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 251
 252    address_space_read_cached(cache, off_flags, &e->flags,
 253                              sizeof(e->flags));
 254    /* Make sure flags is seen before off_wrap */
 255    smp_rmb();
 256    address_space_read_cached(cache, off_off, &e->off_wrap,
 257                              sizeof(e->off_wrap));
 258    virtio_tswap16s(vdev, &e->off_wrap);
 259    virtio_tswap16s(vdev, &e->flags);
 260}
 261
 262static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 263                                        MemoryRegionCache *cache,
 264                                        uint16_t off_wrap)
 265{
 266    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 267
 268    virtio_tswap16s(vdev, &off_wrap);
 269    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
 270    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 271}
 272
 273static void vring_packed_flags_write(VirtIODevice *vdev,
 274                                     MemoryRegionCache *cache, uint16_t flags)
 275{
 276    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 277
 278    virtio_tswap16s(vdev, &flags);
 279    address_space_write_cached(cache, off, &flags, sizeof(flags));
 280    address_space_cache_invalidate(cache, off, sizeof(flags));
 281}
 282
 283/* Called within rcu_read_lock().  */
 284static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 285{
 286    return qatomic_rcu_read(&vq->vring.caches);
 287}
 288
 289/* Called within rcu_read_lock().  */
 290static inline uint16_t vring_avail_flags(VirtQueue *vq)
 291{
 292    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 293    hwaddr pa = offsetof(VRingAvail, flags);
 294
 295    if (!caches) {
 296        return 0;
 297    }
 298
 299    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 300}
 301
 302/* Called within rcu_read_lock().  */
 303static inline uint16_t vring_avail_idx(VirtQueue *vq)
 304{
 305    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 306    hwaddr pa = offsetof(VRingAvail, idx);
 307
 308    if (!caches) {
 309        return 0;
 310    }
 311
 312    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 313    return vq->shadow_avail_idx;
 314}
 315
 316/* Called within rcu_read_lock().  */
 317static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 318{
 319    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 320    hwaddr pa = offsetof(VRingAvail, ring[i]);
 321
 322    if (!caches) {
 323        return 0;
 324    }
 325
 326    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 327}
 328
 329/* Called within rcu_read_lock().  */
 330static inline uint16_t vring_get_used_event(VirtQueue *vq)
 331{
 332    return vring_avail_ring(vq, vq->vring.num);
 333}
 334
 335/* Called within rcu_read_lock().  */
 336static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 337                                    int i)
 338{
 339    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 340    hwaddr pa = offsetof(VRingUsed, ring[i]);
 341
 342    if (!caches) {
 343        return;
 344    }
 345
 346    virtio_tswap32s(vq->vdev, &uelem->id);
 347    virtio_tswap32s(vq->vdev, &uelem->len);
 348    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 349    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 350}
 351
 352/* Called within rcu_read_lock().  */
 353static uint16_t vring_used_idx(VirtQueue *vq)
 354{
 355    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 356    hwaddr pa = offsetof(VRingUsed, idx);
 357
 358    if (!caches) {
 359        return 0;
 360    }
 361
 362    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 363}
 364
 365/* Called within rcu_read_lock().  */
 366static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 367{
 368    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 369    hwaddr pa = offsetof(VRingUsed, idx);
 370
 371    if (caches) {
 372        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 373        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 374    }
 375
 376    vq->used_idx = val;
 377}
 378
 379/* Called within rcu_read_lock().  */
 380static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 381{
 382    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 383    VirtIODevice *vdev = vq->vdev;
 384    hwaddr pa = offsetof(VRingUsed, flags);
 385    uint16_t flags;
 386
 387    if (!caches) {
 388        return;
 389    }
 390
 391    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 392    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 393    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 394}
 395
 396/* Called within rcu_read_lock().  */
 397static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 398{
 399    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 400    VirtIODevice *vdev = vq->vdev;
 401    hwaddr pa = offsetof(VRingUsed, flags);
 402    uint16_t flags;
 403
 404    if (!caches) {
 405        return;
 406    }
 407
 408    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 409    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 410    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 411}
 412
 413/* Called within rcu_read_lock().  */
 414static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 415{
 416    VRingMemoryRegionCaches *caches;
 417    hwaddr pa;
 418    if (!vq->notification) {
 419        return;
 420    }
 421
 422    caches = vring_get_region_caches(vq);
 423    if (!caches) {
 424        return;
 425    }
 426
 427    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 428    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 429    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 430}
 431
 432static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 433{
 434    RCU_READ_LOCK_GUARD();
 435
 436    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 437        vring_set_avail_event(vq, vring_avail_idx(vq));
 438    } else if (enable) {
 439        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 440    } else {
 441        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 442    }
 443    if (enable) {
 444        /* Expose avail event/used flags before caller checks the avail idx. */
 445        smp_mb();
 446    }
 447}
 448
 449static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 450{
 451    uint16_t off_wrap;
 452    VRingPackedDescEvent e;
 453    VRingMemoryRegionCaches *caches;
 454
 455    RCU_READ_LOCK_GUARD();
 456    caches = vring_get_region_caches(vq);
 457    if (!caches) {
 458        return;
 459    }
 460
 461    vring_packed_event_read(vq->vdev, &caches->used, &e);
 462
 463    if (!enable) {
 464        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 465    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 466        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 467        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 468        /* Make sure off_wrap is wrote before flags */
 469        smp_wmb();
 470        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 471    } else {
 472        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 473    }
 474
 475    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 476    if (enable) {
 477        /* Expose avail event/used flags before caller checks the avail idx. */
 478        smp_mb();
 479    }
 480}
 481
 482bool virtio_queue_get_notification(VirtQueue *vq)
 483{
 484    return vq->notification;
 485}
 486
 487void virtio_queue_set_notification(VirtQueue *vq, int enable)
 488{
 489    vq->notification = enable;
 490
 491    if (!vq->vring.desc) {
 492        return;
 493    }
 494
 495    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 496        virtio_queue_packed_set_notification(vq, enable);
 497    } else {
 498        virtio_queue_split_set_notification(vq, enable);
 499    }
 500}
 501
 502int virtio_queue_ready(VirtQueue *vq)
 503{
 504    return vq->vring.avail != 0;
 505}
 506
 507static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 508                                         uint16_t *flags,
 509                                         MemoryRegionCache *cache,
 510                                         int i)
 511{
 512    address_space_read_cached(cache,
 513                              i * sizeof(VRingPackedDesc) +
 514                              offsetof(VRingPackedDesc, flags),
 515                              flags, sizeof(*flags));
 516    virtio_tswap16s(vdev, flags);
 517}
 518
 519static void vring_packed_desc_read(VirtIODevice *vdev,
 520                                   VRingPackedDesc *desc,
 521                                   MemoryRegionCache *cache,
 522                                   int i, bool strict_order)
 523{
 524    hwaddr off = i * sizeof(VRingPackedDesc);
 525
 526    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 527
 528    if (strict_order) {
 529        /* Make sure flags is read before the rest fields. */
 530        smp_rmb();
 531    }
 532
 533    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 534                              &desc->addr, sizeof(desc->addr));
 535    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 536                              &desc->id, sizeof(desc->id));
 537    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 538                              &desc->len, sizeof(desc->len));
 539    virtio_tswap64s(vdev, &desc->addr);
 540    virtio_tswap16s(vdev, &desc->id);
 541    virtio_tswap32s(vdev, &desc->len);
 542}
 543
 544static void vring_packed_desc_write_data(VirtIODevice *vdev,
 545                                         VRingPackedDesc *desc,
 546                                         MemoryRegionCache *cache,
 547                                         int i)
 548{
 549    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 550                    offsetof(VRingPackedDesc, id);
 551    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 552                    offsetof(VRingPackedDesc, len);
 553
 554    virtio_tswap32s(vdev, &desc->len);
 555    virtio_tswap16s(vdev, &desc->id);
 556    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 557    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 558    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 559    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 560}
 561
 562static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 563                                          VRingPackedDesc *desc,
 564                                          MemoryRegionCache *cache,
 565                                          int i)
 566{
 567    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 568
 569    virtio_tswap16s(vdev, &desc->flags);
 570    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
 571    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 572}
 573
 574static void vring_packed_desc_write(VirtIODevice *vdev,
 575                                    VRingPackedDesc *desc,
 576                                    MemoryRegionCache *cache,
 577                                    int i, bool strict_order)
 578{
 579    vring_packed_desc_write_data(vdev, desc, cache, i);
 580    if (strict_order) {
 581        /* Make sure data is wrote before flags. */
 582        smp_wmb();
 583    }
 584    vring_packed_desc_write_flags(vdev, desc, cache, i);
 585}
 586
 587static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 588{
 589    bool avail, used;
 590
 591    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 592    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 593    return (avail != used) && (avail == wrap_counter);
 594}
 595
 596/* Fetch avail_idx from VQ memory only when we really need to know if
 597 * guest has added some buffers.
 598 * Called within rcu_read_lock().  */
 599static int virtio_queue_empty_rcu(VirtQueue *vq)
 600{
 601    if (virtio_device_disabled(vq->vdev)) {
 602        return 1;
 603    }
 604
 605    if (unlikely(!vq->vring.avail)) {
 606        return 1;
 607    }
 608
 609    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 610        return 0;
 611    }
 612
 613    return vring_avail_idx(vq) == vq->last_avail_idx;
 614}
 615
 616static int virtio_queue_split_empty(VirtQueue *vq)
 617{
 618    bool empty;
 619
 620    if (virtio_device_disabled(vq->vdev)) {
 621        return 1;
 622    }
 623
 624    if (unlikely(!vq->vring.avail)) {
 625        return 1;
 626    }
 627
 628    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 629        return 0;
 630    }
 631
 632    RCU_READ_LOCK_GUARD();
 633    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 634    return empty;
 635}
 636
 637static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 638{
 639    struct VRingPackedDesc desc;
 640    VRingMemoryRegionCaches *cache;
 641
 642    if (unlikely(!vq->vring.desc)) {
 643        return 1;
 644    }
 645
 646    cache = vring_get_region_caches(vq);
 647    if (!cache) {
 648        return 1;
 649    }
 650
 651    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 652                                 vq->last_avail_idx);
 653
 654    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 655}
 656
 657static int virtio_queue_packed_empty(VirtQueue *vq)
 658{
 659    RCU_READ_LOCK_GUARD();
 660    return virtio_queue_packed_empty_rcu(vq);
 661}
 662
 663int virtio_queue_empty(VirtQueue *vq)
 664{
 665    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 666        return virtio_queue_packed_empty(vq);
 667    } else {
 668        return virtio_queue_split_empty(vq);
 669    }
 670}
 671
 672static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 673                               unsigned int len)
 674{
 675    AddressSpace *dma_as = vq->vdev->dma_as;
 676    unsigned int offset;
 677    int i;
 678
 679    offset = 0;
 680    for (i = 0; i < elem->in_num; i++) {
 681        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 682
 683        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 684                         elem->in_sg[i].iov_len,
 685                         DMA_DIRECTION_FROM_DEVICE, size);
 686
 687        offset += size;
 688    }
 689
 690    for (i = 0; i < elem->out_num; i++)
 691        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 692                         elem->out_sg[i].iov_len,
 693                         DMA_DIRECTION_TO_DEVICE,
 694                         elem->out_sg[i].iov_len);
 695}
 696
 697/* virtqueue_detach_element:
 698 * @vq: The #VirtQueue
 699 * @elem: The #VirtQueueElement
 700 * @len: number of bytes written
 701 *
 702 * Detach the element from the virtqueue.  This function is suitable for device
 703 * reset or other situations where a #VirtQueueElement is simply freed and will
 704 * not be pushed or discarded.
 705 */
 706void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 707                              unsigned int len)
 708{
 709    vq->inuse -= elem->ndescs;
 710    virtqueue_unmap_sg(vq, elem, len);
 711}
 712
 713static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 714{
 715    vq->last_avail_idx -= num;
 716}
 717
 718static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 719{
 720    if (vq->last_avail_idx < num) {
 721        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 722        vq->last_avail_wrap_counter ^= 1;
 723    } else {
 724        vq->last_avail_idx -= num;
 725    }
 726}
 727
 728/* virtqueue_unpop:
 729 * @vq: The #VirtQueue
 730 * @elem: The #VirtQueueElement
 731 * @len: number of bytes written
 732 *
 733 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 734 * call to virtqueue_pop() will refetch the element.
 735 */
 736void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 737                     unsigned int len)
 738{
 739
 740    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 741        virtqueue_packed_rewind(vq, 1);
 742    } else {
 743        virtqueue_split_rewind(vq, 1);
 744    }
 745
 746    virtqueue_detach_element(vq, elem, len);
 747}
 748
 749/* virtqueue_rewind:
 750 * @vq: The #VirtQueue
 751 * @num: Number of elements to push back
 752 *
 753 * Pretend that elements weren't popped from the virtqueue.  The next
 754 * virtqueue_pop() will refetch the oldest element.
 755 *
 756 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 757 *
 758 * Returns: true on success, false if @num is greater than the number of in use
 759 * elements.
 760 */
 761bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 762{
 763    if (num > vq->inuse) {
 764        return false;
 765    }
 766
 767    vq->inuse -= num;
 768    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 769        virtqueue_packed_rewind(vq, num);
 770    } else {
 771        virtqueue_split_rewind(vq, num);
 772    }
 773    return true;
 774}
 775
 776static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 777                    unsigned int len, unsigned int idx)
 778{
 779    VRingUsedElem uelem;
 780
 781    if (unlikely(!vq->vring.used)) {
 782        return;
 783    }
 784
 785    idx = (idx + vq->used_idx) % vq->vring.num;
 786
 787    uelem.id = elem->index;
 788    uelem.len = len;
 789    vring_used_write(vq, &uelem, idx);
 790}
 791
 792static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 793                                  unsigned int len, unsigned int idx)
 794{
 795    vq->used_elems[idx].index = elem->index;
 796    vq->used_elems[idx].len = len;
 797    vq->used_elems[idx].ndescs = elem->ndescs;
 798}
 799
 800static void virtqueue_packed_fill_desc(VirtQueue *vq,
 801                                       const VirtQueueElement *elem,
 802                                       unsigned int idx,
 803                                       bool strict_order)
 804{
 805    uint16_t head;
 806    VRingMemoryRegionCaches *caches;
 807    VRingPackedDesc desc = {
 808        .id = elem->index,
 809        .len = elem->len,
 810    };
 811    bool wrap_counter = vq->used_wrap_counter;
 812
 813    if (unlikely(!vq->vring.desc)) {
 814        return;
 815    }
 816
 817    head = vq->used_idx + idx;
 818    if (head >= vq->vring.num) {
 819        head -= vq->vring.num;
 820        wrap_counter ^= 1;
 821    }
 822    if (wrap_counter) {
 823        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 824        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 825    } else {
 826        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 827        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 828    }
 829
 830    caches = vring_get_region_caches(vq);
 831    if (!caches) {
 832        return;
 833    }
 834
 835    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 836}
 837
 838/* Called within rcu_read_lock().  */
 839void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 840                    unsigned int len, unsigned int idx)
 841{
 842    trace_virtqueue_fill(vq, elem, len, idx);
 843
 844    virtqueue_unmap_sg(vq, elem, len);
 845
 846    if (virtio_device_disabled(vq->vdev)) {
 847        return;
 848    }
 849
 850    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 851        virtqueue_packed_fill(vq, elem, len, idx);
 852    } else {
 853        virtqueue_split_fill(vq, elem, len, idx);
 854    }
 855}
 856
 857/* Called within rcu_read_lock().  */
 858static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 859{
 860    uint16_t old, new;
 861
 862    if (unlikely(!vq->vring.used)) {
 863        return;
 864    }
 865
 866    /* Make sure buffer is written before we update index. */
 867    smp_wmb();
 868    trace_virtqueue_flush(vq, count);
 869    old = vq->used_idx;
 870    new = old + count;
 871    vring_used_idx_set(vq, new);
 872    vq->inuse -= count;
 873    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 874        vq->signalled_used_valid = false;
 875}
 876
 877static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 878{
 879    unsigned int i, ndescs = 0;
 880
 881    if (unlikely(!vq->vring.desc)) {
 882        return;
 883    }
 884
 885    for (i = 1; i < count; i++) {
 886        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 887        ndescs += vq->used_elems[i].ndescs;
 888    }
 889    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 890    ndescs += vq->used_elems[0].ndescs;
 891
 892    vq->inuse -= ndescs;
 893    vq->used_idx += ndescs;
 894    if (vq->used_idx >= vq->vring.num) {
 895        vq->used_idx -= vq->vring.num;
 896        vq->used_wrap_counter ^= 1;
 897    }
 898}
 899
 900void virtqueue_flush(VirtQueue *vq, unsigned int count)
 901{
 902    if (virtio_device_disabled(vq->vdev)) {
 903        vq->inuse -= count;
 904        return;
 905    }
 906
 907    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 908        virtqueue_packed_flush(vq, count);
 909    } else {
 910        virtqueue_split_flush(vq, count);
 911    }
 912}
 913
 914void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 915                    unsigned int len)
 916{
 917    RCU_READ_LOCK_GUARD();
 918    virtqueue_fill(vq, elem, len, 0);
 919    virtqueue_flush(vq, 1);
 920}
 921
 922/* Called within rcu_read_lock().  */
 923static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 924{
 925    uint16_t num_heads = vring_avail_idx(vq) - idx;
 926
 927    /* Check it isn't doing very strange things with descriptor numbers. */
 928    if (num_heads > vq->vring.num) {
 929        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 930                     idx, vq->shadow_avail_idx);
 931        return -EINVAL;
 932    }
 933    /* On success, callers read a descriptor at vq->last_avail_idx.
 934     * Make sure descriptor read does not bypass avail index read. */
 935    if (num_heads) {
 936        smp_rmb();
 937    }
 938
 939    return num_heads;
 940}
 941
 942/* Called within rcu_read_lock().  */
 943static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 944                               unsigned int *head)
 945{
 946    /* Grab the next descriptor number they're advertising, and increment
 947     * the index we've seen. */
 948    *head = vring_avail_ring(vq, idx % vq->vring.num);
 949
 950    /* If their number is silly, that's a fatal mistake. */
 951    if (*head >= vq->vring.num) {
 952        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 953        return false;
 954    }
 955
 956    return true;
 957}
 958
 959enum {
 960    VIRTQUEUE_READ_DESC_ERROR = -1,
 961    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 962    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 963};
 964
 965static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 966                                          MemoryRegionCache *desc_cache,
 967                                          unsigned int max, unsigned int *next)
 968{
 969    /* If this descriptor says it doesn't chain, we're done. */
 970    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 971        return VIRTQUEUE_READ_DESC_DONE;
 972    }
 973
 974    /* Check they're not leading us off end of descriptors. */
 975    *next = desc->next;
 976    /* Make sure compiler knows to grab that: we don't want it changing! */
 977    smp_wmb();
 978
 979    if (*next >= max) {
 980        virtio_error(vdev, "Desc next is %u", *next);
 981        return VIRTQUEUE_READ_DESC_ERROR;
 982    }
 983
 984    vring_split_desc_read(vdev, desc, desc_cache, *next);
 985    return VIRTQUEUE_READ_DESC_MORE;
 986}
 987
 988static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
 989                            unsigned int *in_bytes, unsigned int *out_bytes,
 990                            unsigned max_in_bytes, unsigned max_out_bytes)
 991{
 992    VirtIODevice *vdev = vq->vdev;
 993    unsigned int max, idx;
 994    unsigned int total_bufs, in_total, out_total;
 995    VRingMemoryRegionCaches *caches;
 996    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 997    int64_t len = 0;
 998    int rc;
 999
1000    RCU_READ_LOCK_GUARD();
1001
1002    idx = vq->last_avail_idx;
1003    total_bufs = in_total = out_total = 0;
1004
1005    max = vq->vring.num;
1006    caches = vring_get_region_caches(vq);
1007    if (!caches) {
1008        goto err;
1009    }
1010
1011    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1012        MemoryRegionCache *desc_cache = &caches->desc;
1013        unsigned int num_bufs;
1014        VRingDesc desc;
1015        unsigned int i;
1016
1017        num_bufs = total_bufs;
1018
1019        if (!virtqueue_get_head(vq, idx++, &i)) {
1020            goto err;
1021        }
1022
1023        vring_split_desc_read(vdev, &desc, desc_cache, i);
1024
1025        if (desc.flags & VRING_DESC_F_INDIRECT) {
1026            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1027                virtio_error(vdev, "Invalid size for indirect buffer table");
1028                goto err;
1029            }
1030
1031            /* If we've got too many, that implies a descriptor loop. */
1032            if (num_bufs >= max) {
1033                virtio_error(vdev, "Looped descriptor");
1034                goto err;
1035            }
1036
1037            /* loop over the indirect descriptor table */
1038            len = address_space_cache_init(&indirect_desc_cache,
1039                                           vdev->dma_as,
1040                                           desc.addr, desc.len, false);
1041            desc_cache = &indirect_desc_cache;
1042            if (len < desc.len) {
1043                virtio_error(vdev, "Cannot map indirect buffer");
1044                goto err;
1045            }
1046
1047            max = desc.len / sizeof(VRingDesc);
1048            num_bufs = i = 0;
1049            vring_split_desc_read(vdev, &desc, desc_cache, i);
1050        }
1051
1052        do {
1053            /* If we've got too many, that implies a descriptor loop. */
1054            if (++num_bufs > max) {
1055                virtio_error(vdev, "Looped descriptor");
1056                goto err;
1057            }
1058
1059            if (desc.flags & VRING_DESC_F_WRITE) {
1060                in_total += desc.len;
1061            } else {
1062                out_total += desc.len;
1063            }
1064            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1065                goto done;
1066            }
1067
1068            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1069        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1070
1071        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1072            goto err;
1073        }
1074
1075        if (desc_cache == &indirect_desc_cache) {
1076            address_space_cache_destroy(&indirect_desc_cache);
1077            total_bufs++;
1078        } else {
1079            total_bufs = num_bufs;
1080        }
1081    }
1082
1083    if (rc < 0) {
1084        goto err;
1085    }
1086
1087done:
1088    address_space_cache_destroy(&indirect_desc_cache);
1089    if (in_bytes) {
1090        *in_bytes = in_total;
1091    }
1092    if (out_bytes) {
1093        *out_bytes = out_total;
1094    }
1095    return;
1096
1097err:
1098    in_total = out_total = 0;
1099    goto done;
1100}
1101
1102static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1103                                           VRingPackedDesc *desc,
1104                                           MemoryRegionCache
1105                                           *desc_cache,
1106                                           unsigned int max,
1107                                           unsigned int *next,
1108                                           bool indirect)
1109{
1110    /* If this descriptor says it doesn't chain, we're done. */
1111    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1112        return VIRTQUEUE_READ_DESC_DONE;
1113    }
1114
1115    ++*next;
1116    if (*next == max) {
1117        if (indirect) {
1118            return VIRTQUEUE_READ_DESC_DONE;
1119        } else {
1120            (*next) -= vq->vring.num;
1121        }
1122    }
1123
1124    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1125    return VIRTQUEUE_READ_DESC_MORE;
1126}
1127
1128static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1129                                             unsigned int *in_bytes,
1130                                             unsigned int *out_bytes,
1131                                             unsigned max_in_bytes,
1132                                             unsigned max_out_bytes)
1133{
1134    VirtIODevice *vdev = vq->vdev;
1135    unsigned int max, idx;
1136    unsigned int total_bufs, in_total, out_total;
1137    MemoryRegionCache *desc_cache;
1138    VRingMemoryRegionCaches *caches;
1139    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1140    int64_t len = 0;
1141    VRingPackedDesc desc;
1142    bool wrap_counter;
1143
1144    RCU_READ_LOCK_GUARD();
1145    idx = vq->last_avail_idx;
1146    wrap_counter = vq->last_avail_wrap_counter;
1147    total_bufs = in_total = out_total = 0;
1148
1149    max = vq->vring.num;
1150    caches = vring_get_region_caches(vq);
1151    if (!caches) {
1152        goto err;
1153    }
1154
1155    for (;;) {
1156        unsigned int num_bufs = total_bufs;
1157        unsigned int i = idx;
1158        int rc;
1159
1160        desc_cache = &caches->desc;
1161        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1162        if (!is_desc_avail(desc.flags, wrap_counter)) {
1163            break;
1164        }
1165
1166        if (desc.flags & VRING_DESC_F_INDIRECT) {
1167            if (desc.len % sizeof(VRingPackedDesc)) {
1168                virtio_error(vdev, "Invalid size for indirect buffer table");
1169                goto err;
1170            }
1171
1172            /* If we've got too many, that implies a descriptor loop. */
1173            if (num_bufs >= max) {
1174                virtio_error(vdev, "Looped descriptor");
1175                goto err;
1176            }
1177
1178            /* loop over the indirect descriptor table */
1179            len = address_space_cache_init(&indirect_desc_cache,
1180                                           vdev->dma_as,
1181                                           desc.addr, desc.len, false);
1182            desc_cache = &indirect_desc_cache;
1183            if (len < desc.len) {
1184                virtio_error(vdev, "Cannot map indirect buffer");
1185                goto err;
1186            }
1187
1188            max = desc.len / sizeof(VRingPackedDesc);
1189            num_bufs = i = 0;
1190            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1191        }
1192
1193        do {
1194            /* If we've got too many, that implies a descriptor loop. */
1195            if (++num_bufs > max) {
1196                virtio_error(vdev, "Looped descriptor");
1197                goto err;
1198            }
1199
1200            if (desc.flags & VRING_DESC_F_WRITE) {
1201                in_total += desc.len;
1202            } else {
1203                out_total += desc.len;
1204            }
1205            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1206                goto done;
1207            }
1208
1209            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1210                                                 &i, desc_cache ==
1211                                                 &indirect_desc_cache);
1212        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1213
1214        if (desc_cache == &indirect_desc_cache) {
1215            address_space_cache_destroy(&indirect_desc_cache);
1216            total_bufs++;
1217            idx++;
1218        } else {
1219            idx += num_bufs - total_bufs;
1220            total_bufs = num_bufs;
1221        }
1222
1223        if (idx >= vq->vring.num) {
1224            idx -= vq->vring.num;
1225            wrap_counter ^= 1;
1226        }
1227    }
1228
1229    /* Record the index and wrap counter for a kick we want */
1230    vq->shadow_avail_idx = idx;
1231    vq->shadow_avail_wrap_counter = wrap_counter;
1232done:
1233    address_space_cache_destroy(&indirect_desc_cache);
1234    if (in_bytes) {
1235        *in_bytes = in_total;
1236    }
1237    if (out_bytes) {
1238        *out_bytes = out_total;
1239    }
1240    return;
1241
1242err:
1243    in_total = out_total = 0;
1244    goto done;
1245}
1246
1247void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1248                               unsigned int *out_bytes,
1249                               unsigned max_in_bytes, unsigned max_out_bytes)
1250{
1251    uint16_t desc_size;
1252    VRingMemoryRegionCaches *caches;
1253
1254    if (unlikely(!vq->vring.desc)) {
1255        goto err;
1256    }
1257
1258    caches = vring_get_region_caches(vq);
1259    if (!caches) {
1260        goto err;
1261    }
1262
1263    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1264                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1265    if (caches->desc.len < vq->vring.num * desc_size) {
1266        virtio_error(vq->vdev, "Cannot map descriptor ring");
1267        goto err;
1268    }
1269
1270    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1271        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1272                                         max_in_bytes, max_out_bytes);
1273    } else {
1274        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1275                                        max_in_bytes, max_out_bytes);
1276    }
1277
1278    return;
1279err:
1280    if (in_bytes) {
1281        *in_bytes = 0;
1282    }
1283    if (out_bytes) {
1284        *out_bytes = 0;
1285    }
1286}
1287
1288int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1289                          unsigned int out_bytes)
1290{
1291    unsigned int in_total, out_total;
1292
1293    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1294    return in_bytes <= in_total && out_bytes <= out_total;
1295}
1296
1297static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1298                               hwaddr *addr, struct iovec *iov,
1299                               unsigned int max_num_sg, bool is_write,
1300                               hwaddr pa, size_t sz)
1301{
1302    bool ok = false;
1303    unsigned num_sg = *p_num_sg;
1304    assert(num_sg <= max_num_sg);
1305
1306    if (!sz) {
1307        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1308        goto out;
1309    }
1310
1311    while (sz) {
1312        hwaddr len = sz;
1313
1314        if (num_sg == max_num_sg) {
1315            virtio_error(vdev, "virtio: too many write descriptors in "
1316                               "indirect table");
1317            goto out;
1318        }
1319
1320        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1321                                              is_write ?
1322                                              DMA_DIRECTION_FROM_DEVICE :
1323                                              DMA_DIRECTION_TO_DEVICE);
1324        if (!iov[num_sg].iov_base) {
1325            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1326            goto out;
1327        }
1328
1329        iov[num_sg].iov_len = len;
1330        addr[num_sg] = pa;
1331
1332        sz -= len;
1333        pa += len;
1334        num_sg++;
1335    }
1336    ok = true;
1337
1338out:
1339    *p_num_sg = num_sg;
1340    return ok;
1341}
1342
1343/* Only used by error code paths before we have a VirtQueueElement (therefore
1344 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1345 * yet.
1346 */
1347static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1348                                    struct iovec *iov)
1349{
1350    unsigned int i;
1351
1352    for (i = 0; i < out_num + in_num; i++) {
1353        int is_write = i >= out_num;
1354
1355        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1356        iov++;
1357    }
1358}
1359
1360static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1361                                hwaddr *addr, unsigned int num_sg,
1362                                bool is_write)
1363{
1364    unsigned int i;
1365    hwaddr len;
1366
1367    for (i = 0; i < num_sg; i++) {
1368        len = sg[i].iov_len;
1369        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1370                                        addr[i], &len, is_write ?
1371                                        DMA_DIRECTION_FROM_DEVICE :
1372                                        DMA_DIRECTION_TO_DEVICE);
1373        if (!sg[i].iov_base) {
1374            error_report("virtio: error trying to map MMIO memory");
1375            exit(1);
1376        }
1377        if (len != sg[i].iov_len) {
1378            error_report("virtio: unexpected memory split");
1379            exit(1);
1380        }
1381    }
1382}
1383
1384void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1385{
1386    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1387    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1388                                                                        false);
1389}
1390
1391static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1392{
1393    VirtQueueElement *elem;
1394    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1395    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1396    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1397    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1398    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1399    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1400
1401    assert(sz >= sizeof(VirtQueueElement));
1402    elem = g_malloc(out_sg_end);
1403    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1404    elem->out_num = out_num;
1405    elem->in_num = in_num;
1406    elem->in_addr = (void *)elem + in_addr_ofs;
1407    elem->out_addr = (void *)elem + out_addr_ofs;
1408    elem->in_sg = (void *)elem + in_sg_ofs;
1409    elem->out_sg = (void *)elem + out_sg_ofs;
1410    return elem;
1411}
1412
1413static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1414{
1415    unsigned int i, head, max;
1416    VRingMemoryRegionCaches *caches;
1417    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1418    MemoryRegionCache *desc_cache;
1419    int64_t len;
1420    VirtIODevice *vdev = vq->vdev;
1421    VirtQueueElement *elem = NULL;
1422    unsigned out_num, in_num, elem_entries;
1423    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1424    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1425    VRingDesc desc;
1426    int rc;
1427
1428    RCU_READ_LOCK_GUARD();
1429    if (virtio_queue_empty_rcu(vq)) {
1430        goto done;
1431    }
1432    /* Needed after virtio_queue_empty(), see comment in
1433     * virtqueue_num_heads(). */
1434    smp_rmb();
1435
1436    /* When we start there are none of either input nor output. */
1437    out_num = in_num = elem_entries = 0;
1438
1439    max = vq->vring.num;
1440
1441    if (vq->inuse >= vq->vring.num) {
1442        virtio_error(vdev, "Virtqueue size exceeded");
1443        goto done;
1444    }
1445
1446    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1447        goto done;
1448    }
1449
1450    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1451        vring_set_avail_event(vq, vq->last_avail_idx);
1452    }
1453
1454    i = head;
1455
1456    caches = vring_get_region_caches(vq);
1457    if (!caches) {
1458        virtio_error(vdev, "Region caches not initialized");
1459        goto done;
1460    }
1461
1462    if (caches->desc.len < max * sizeof(VRingDesc)) {
1463        virtio_error(vdev, "Cannot map descriptor ring");
1464        goto done;
1465    }
1466
1467    desc_cache = &caches->desc;
1468    vring_split_desc_read(vdev, &desc, desc_cache, i);
1469    if (desc.flags & VRING_DESC_F_INDIRECT) {
1470        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1471            virtio_error(vdev, "Invalid size for indirect buffer table");
1472            goto done;
1473        }
1474
1475        /* loop over the indirect descriptor table */
1476        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1477                                       desc.addr, desc.len, false);
1478        desc_cache = &indirect_desc_cache;
1479        if (len < desc.len) {
1480            virtio_error(vdev, "Cannot map indirect buffer");
1481            goto done;
1482        }
1483
1484        max = desc.len / sizeof(VRingDesc);
1485        i = 0;
1486        vring_split_desc_read(vdev, &desc, desc_cache, i);
1487    }
1488
1489    /* Collect all the descriptors */
1490    do {
1491        bool map_ok;
1492
1493        if (desc.flags & VRING_DESC_F_WRITE) {
1494            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1495                                        iov + out_num,
1496                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1497                                        desc.addr, desc.len);
1498        } else {
1499            if (in_num) {
1500                virtio_error(vdev, "Incorrect order for descriptors");
1501                goto err_undo_map;
1502            }
1503            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1504                                        VIRTQUEUE_MAX_SIZE, false,
1505                                        desc.addr, desc.len);
1506        }
1507        if (!map_ok) {
1508            goto err_undo_map;
1509        }
1510
1511        /* If we've got too many, that implies a descriptor loop. */
1512        if (++elem_entries > max) {
1513            virtio_error(vdev, "Looped descriptor");
1514            goto err_undo_map;
1515        }
1516
1517        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1518    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1519
1520    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1521        goto err_undo_map;
1522    }
1523
1524    /* Now copy what we have collected and mapped */
1525    elem = virtqueue_alloc_element(sz, out_num, in_num);
1526    elem->index = head;
1527    elem->ndescs = 1;
1528    for (i = 0; i < out_num; i++) {
1529        elem->out_addr[i] = addr[i];
1530        elem->out_sg[i] = iov[i];
1531    }
1532    for (i = 0; i < in_num; i++) {
1533        elem->in_addr[i] = addr[out_num + i];
1534        elem->in_sg[i] = iov[out_num + i];
1535    }
1536
1537    vq->inuse++;
1538
1539    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1540done:
1541    address_space_cache_destroy(&indirect_desc_cache);
1542
1543    return elem;
1544
1545err_undo_map:
1546    virtqueue_undo_map_desc(out_num, in_num, iov);
1547    goto done;
1548}
1549
1550static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1551{
1552    unsigned int i, max;
1553    VRingMemoryRegionCaches *caches;
1554    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1555    MemoryRegionCache *desc_cache;
1556    int64_t len;
1557    VirtIODevice *vdev = vq->vdev;
1558    VirtQueueElement *elem = NULL;
1559    unsigned out_num, in_num, elem_entries;
1560    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1561    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1562    VRingPackedDesc desc;
1563    uint16_t id;
1564    int rc;
1565
1566    RCU_READ_LOCK_GUARD();
1567    if (virtio_queue_packed_empty_rcu(vq)) {
1568        goto done;
1569    }
1570
1571    /* When we start there are none of either input nor output. */
1572    out_num = in_num = elem_entries = 0;
1573
1574    max = vq->vring.num;
1575
1576    if (vq->inuse >= vq->vring.num) {
1577        virtio_error(vdev, "Virtqueue size exceeded");
1578        goto done;
1579    }
1580
1581    i = vq->last_avail_idx;
1582
1583    caches = vring_get_region_caches(vq);
1584    if (!caches) {
1585        virtio_error(vdev, "Region caches not initialized");
1586        goto done;
1587    }
1588
1589    if (caches->desc.len < max * sizeof(VRingDesc)) {
1590        virtio_error(vdev, "Cannot map descriptor ring");
1591        goto done;
1592    }
1593
1594    desc_cache = &caches->desc;
1595    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1596    id = desc.id;
1597    if (desc.flags & VRING_DESC_F_INDIRECT) {
1598        if (desc.len % sizeof(VRingPackedDesc)) {
1599            virtio_error(vdev, "Invalid size for indirect buffer table");
1600            goto done;
1601        }
1602
1603        /* loop over the indirect descriptor table */
1604        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1605                                       desc.addr, desc.len, false);
1606        desc_cache = &indirect_desc_cache;
1607        if (len < desc.len) {
1608            virtio_error(vdev, "Cannot map indirect buffer");
1609            goto done;
1610        }
1611
1612        max = desc.len / sizeof(VRingPackedDesc);
1613        i = 0;
1614        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1615    }
1616
1617    /* Collect all the descriptors */
1618    do {
1619        bool map_ok;
1620
1621        if (desc.flags & VRING_DESC_F_WRITE) {
1622            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1623                                        iov + out_num,
1624                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1625                                        desc.addr, desc.len);
1626        } else {
1627            if (in_num) {
1628                virtio_error(vdev, "Incorrect order for descriptors");
1629                goto err_undo_map;
1630            }
1631            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1632                                        VIRTQUEUE_MAX_SIZE, false,
1633                                        desc.addr, desc.len);
1634        }
1635        if (!map_ok) {
1636            goto err_undo_map;
1637        }
1638
1639        /* If we've got too many, that implies a descriptor loop. */
1640        if (++elem_entries > max) {
1641            virtio_error(vdev, "Looped descriptor");
1642            goto err_undo_map;
1643        }
1644
1645        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1646                                             desc_cache ==
1647                                             &indirect_desc_cache);
1648    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1649
1650    /* Now copy what we have collected and mapped */
1651    elem = virtqueue_alloc_element(sz, out_num, in_num);
1652    for (i = 0; i < out_num; i++) {
1653        elem->out_addr[i] = addr[i];
1654        elem->out_sg[i] = iov[i];
1655    }
1656    for (i = 0; i < in_num; i++) {
1657        elem->in_addr[i] = addr[out_num + i];
1658        elem->in_sg[i] = iov[out_num + i];
1659    }
1660
1661    elem->index = id;
1662    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1663    vq->last_avail_idx += elem->ndescs;
1664    vq->inuse += elem->ndescs;
1665
1666    if (vq->last_avail_idx >= vq->vring.num) {
1667        vq->last_avail_idx -= vq->vring.num;
1668        vq->last_avail_wrap_counter ^= 1;
1669    }
1670
1671    vq->shadow_avail_idx = vq->last_avail_idx;
1672    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1673
1674    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1675done:
1676    address_space_cache_destroy(&indirect_desc_cache);
1677
1678    return elem;
1679
1680err_undo_map:
1681    virtqueue_undo_map_desc(out_num, in_num, iov);
1682    goto done;
1683}
1684
1685void *virtqueue_pop(VirtQueue *vq, size_t sz)
1686{
1687    if (virtio_device_disabled(vq->vdev)) {
1688        return NULL;
1689    }
1690
1691    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1692        return virtqueue_packed_pop(vq, sz);
1693    } else {
1694        return virtqueue_split_pop(vq, sz);
1695    }
1696}
1697
1698static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1699{
1700    VRingMemoryRegionCaches *caches;
1701    MemoryRegionCache *desc_cache;
1702    unsigned int dropped = 0;
1703    VirtQueueElement elem = {};
1704    VirtIODevice *vdev = vq->vdev;
1705    VRingPackedDesc desc;
1706
1707    caches = vring_get_region_caches(vq);
1708    if (!caches) {
1709        return 0;
1710    }
1711
1712    desc_cache = &caches->desc;
1713
1714    virtio_queue_set_notification(vq, 0);
1715
1716    while (vq->inuse < vq->vring.num) {
1717        unsigned int idx = vq->last_avail_idx;
1718        /*
1719         * works similar to virtqueue_pop but does not map buffers
1720         * and does not allocate any memory.
1721         */
1722        vring_packed_desc_read(vdev, &desc, desc_cache,
1723                               vq->last_avail_idx , true);
1724        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1725            break;
1726        }
1727        elem.index = desc.id;
1728        elem.ndescs = 1;
1729        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1730                                               vq->vring.num, &idx, false)) {
1731            ++elem.ndescs;
1732        }
1733        /*
1734         * immediately push the element, nothing to unmap
1735         * as both in_num and out_num are set to 0.
1736         */
1737        virtqueue_push(vq, &elem, 0);
1738        dropped++;
1739        vq->last_avail_idx += elem.ndescs;
1740        if (vq->last_avail_idx >= vq->vring.num) {
1741            vq->last_avail_idx -= vq->vring.num;
1742            vq->last_avail_wrap_counter ^= 1;
1743        }
1744    }
1745
1746    return dropped;
1747}
1748
1749static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1750{
1751    unsigned int dropped = 0;
1752    VirtQueueElement elem = {};
1753    VirtIODevice *vdev = vq->vdev;
1754    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1755
1756    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1757        /* works similar to virtqueue_pop but does not map buffers
1758        * and does not allocate any memory */
1759        smp_rmb();
1760        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1761            break;
1762        }
1763        vq->inuse++;
1764        vq->last_avail_idx++;
1765        if (fEventIdx) {
1766            vring_set_avail_event(vq, vq->last_avail_idx);
1767        }
1768        /* immediately push the element, nothing to unmap
1769         * as both in_num and out_num are set to 0 */
1770        virtqueue_push(vq, &elem, 0);
1771        dropped++;
1772    }
1773
1774    return dropped;
1775}
1776
1777/* virtqueue_drop_all:
1778 * @vq: The #VirtQueue
1779 * Drops all queued buffers and indicates them to the guest
1780 * as if they are done. Useful when buffers can not be
1781 * processed but must be returned to the guest.
1782 */
1783unsigned int virtqueue_drop_all(VirtQueue *vq)
1784{
1785    struct VirtIODevice *vdev = vq->vdev;
1786
1787    if (virtio_device_disabled(vq->vdev)) {
1788        return 0;
1789    }
1790
1791    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1792        return virtqueue_packed_drop_all(vq);
1793    } else {
1794        return virtqueue_split_drop_all(vq);
1795    }
1796}
1797
1798/* Reading and writing a structure directly to QEMUFile is *awful*, but
1799 * it is what QEMU has always done by mistake.  We can change it sooner
1800 * or later by bumping the version number of the affected vm states.
1801 * In the meanwhile, since the in-memory layout of VirtQueueElement
1802 * has changed, we need to marshal to and from the layout that was
1803 * used before the change.
1804 */
1805typedef struct VirtQueueElementOld {
1806    unsigned int index;
1807    unsigned int out_num;
1808    unsigned int in_num;
1809    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1810    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1811    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1812    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1813} VirtQueueElementOld;
1814
1815void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1816{
1817    VirtQueueElement *elem;
1818    VirtQueueElementOld data;
1819    int i;
1820
1821    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1822
1823    /* TODO: teach all callers that this can fail, and return failure instead
1824     * of asserting here.
1825     * This is just one thing (there are probably more) that must be
1826     * fixed before we can allow NDEBUG compilation.
1827     */
1828    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1829    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1830
1831    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1832    elem->index = data.index;
1833
1834    for (i = 0; i < elem->in_num; i++) {
1835        elem->in_addr[i] = data.in_addr[i];
1836    }
1837
1838    for (i = 0; i < elem->out_num; i++) {
1839        elem->out_addr[i] = data.out_addr[i];
1840    }
1841
1842    for (i = 0; i < elem->in_num; i++) {
1843        /* Base is overwritten by virtqueue_map.  */
1844        elem->in_sg[i].iov_base = 0;
1845        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1846    }
1847
1848    for (i = 0; i < elem->out_num; i++) {
1849        /* Base is overwritten by virtqueue_map.  */
1850        elem->out_sg[i].iov_base = 0;
1851        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1852    }
1853
1854    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1855        qemu_get_be32s(f, &elem->ndescs);
1856    }
1857
1858    virtqueue_map(vdev, elem);
1859    return elem;
1860}
1861
1862void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1863                                VirtQueueElement *elem)
1864{
1865    VirtQueueElementOld data;
1866    int i;
1867
1868    memset(&data, 0, sizeof(data));
1869    data.index = elem->index;
1870    data.in_num = elem->in_num;
1871    data.out_num = elem->out_num;
1872
1873    for (i = 0; i < elem->in_num; i++) {
1874        data.in_addr[i] = elem->in_addr[i];
1875    }
1876
1877    for (i = 0; i < elem->out_num; i++) {
1878        data.out_addr[i] = elem->out_addr[i];
1879    }
1880
1881    for (i = 0; i < elem->in_num; i++) {
1882        /* Base is overwritten by virtqueue_map when loading.  Do not
1883         * save it, as it would leak the QEMU address space layout.  */
1884        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1885    }
1886
1887    for (i = 0; i < elem->out_num; i++) {
1888        /* Do not save iov_base as above.  */
1889        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1890    }
1891
1892    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1893        qemu_put_be32s(f, &elem->ndescs);
1894    }
1895
1896    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897}
1898
1899/* virtio device */
1900static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1901{
1902    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1903    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1904
1905    if (virtio_device_disabled(vdev)) {
1906        return;
1907    }
1908
1909    if (k->notify) {
1910        k->notify(qbus->parent, vector);
1911    }
1912}
1913
1914void virtio_update_irq(VirtIODevice *vdev)
1915{
1916    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1917}
1918
1919static int virtio_validate_features(VirtIODevice *vdev)
1920{
1921    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1922
1923    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1924        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1925        return -EFAULT;
1926    }
1927
1928    if (k->validate_features) {
1929        return k->validate_features(vdev);
1930    } else {
1931        return 0;
1932    }
1933}
1934
1935int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1936{
1937    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1938    trace_virtio_set_status(vdev, val);
1939
1940    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1941        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1942            val & VIRTIO_CONFIG_S_FEATURES_OK) {
1943            int ret = virtio_validate_features(vdev);
1944
1945            if (ret) {
1946                return ret;
1947            }
1948        }
1949    }
1950
1951    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1952        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1953        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1954    }
1955
1956    if (k->set_status) {
1957        k->set_status(vdev, val);
1958    }
1959    vdev->status = val;
1960
1961    return 0;
1962}
1963
1964static enum virtio_device_endian virtio_default_endian(void)
1965{
1966    if (target_words_bigendian()) {
1967        return VIRTIO_DEVICE_ENDIAN_BIG;
1968    } else {
1969        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970    }
1971}
1972
1973static enum virtio_device_endian virtio_current_cpu_endian(void)
1974{
1975    if (cpu_virtio_is_big_endian(current_cpu)) {
1976        return VIRTIO_DEVICE_ENDIAN_BIG;
1977    } else {
1978        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1979    }
1980}
1981
1982void virtio_reset(void *opaque)
1983{
1984    VirtIODevice *vdev = opaque;
1985    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1986    int i;
1987
1988    virtio_set_status(vdev, 0);
1989    if (current_cpu) {
1990        /* Guest initiated reset */
1991        vdev->device_endian = virtio_current_cpu_endian();
1992    } else {
1993        /* System reset */
1994        vdev->device_endian = virtio_default_endian();
1995    }
1996
1997    if (k->reset) {
1998        k->reset(vdev);
1999    }
2000
2001    vdev->start_on_kick = false;
2002    vdev->started = false;
2003    vdev->broken = false;
2004    vdev->guest_features = 0;
2005    vdev->queue_sel = 0;
2006    vdev->status = 0;
2007    vdev->disabled = false;
2008    qatomic_set(&vdev->isr, 0);
2009    vdev->config_vector = VIRTIO_NO_VECTOR;
2010    virtio_notify_vector(vdev, vdev->config_vector);
2011
2012    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2013        vdev->vq[i].vring.desc = 0;
2014        vdev->vq[i].vring.avail = 0;
2015        vdev->vq[i].vring.used = 0;
2016        vdev->vq[i].last_avail_idx = 0;
2017        vdev->vq[i].shadow_avail_idx = 0;
2018        vdev->vq[i].used_idx = 0;
2019        vdev->vq[i].last_avail_wrap_counter = true;
2020        vdev->vq[i].shadow_avail_wrap_counter = true;
2021        vdev->vq[i].used_wrap_counter = true;
2022        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2023        vdev->vq[i].signalled_used = 0;
2024        vdev->vq[i].signalled_used_valid = false;
2025        vdev->vq[i].notification = true;
2026        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2027        vdev->vq[i].inuse = 0;
2028        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2029    }
2030}
2031
2032uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2033{
2034    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2035    uint8_t val;
2036
2037    if (addr + sizeof(val) > vdev->config_len) {
2038        return (uint32_t)-1;
2039    }
2040
2041    k->get_config(vdev, vdev->config);
2042
2043    val = ldub_p(vdev->config + addr);
2044    return val;
2045}
2046
2047uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2048{
2049    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2050    uint16_t val;
2051
2052    if (addr + sizeof(val) > vdev->config_len) {
2053        return (uint32_t)-1;
2054    }
2055
2056    k->get_config(vdev, vdev->config);
2057
2058    val = lduw_p(vdev->config + addr);
2059    return val;
2060}
2061
2062uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2063{
2064    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2065    uint32_t val;
2066
2067    if (addr + sizeof(val) > vdev->config_len) {
2068        return (uint32_t)-1;
2069    }
2070
2071    k->get_config(vdev, vdev->config);
2072
2073    val = ldl_p(vdev->config + addr);
2074    return val;
2075}
2076
2077void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2078{
2079    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080    uint8_t val = data;
2081
2082    if (addr + sizeof(val) > vdev->config_len) {
2083        return;
2084    }
2085
2086    stb_p(vdev->config + addr, val);
2087
2088    if (k->set_config) {
2089        k->set_config(vdev, vdev->config);
2090    }
2091}
2092
2093void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2094{
2095    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2096    uint16_t val = data;
2097
2098    if (addr + sizeof(val) > vdev->config_len) {
2099        return;
2100    }
2101
2102    stw_p(vdev->config + addr, val);
2103
2104    if (k->set_config) {
2105        k->set_config(vdev, vdev->config);
2106    }
2107}
2108
2109void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2110{
2111    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2112    uint32_t val = data;
2113
2114    if (addr + sizeof(val) > vdev->config_len) {
2115        return;
2116    }
2117
2118    stl_p(vdev->config + addr, val);
2119
2120    if (k->set_config) {
2121        k->set_config(vdev, vdev->config);
2122    }
2123}
2124
2125uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2126{
2127    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2128    uint8_t val;
2129
2130    if (addr + sizeof(val) > vdev->config_len) {
2131        return (uint32_t)-1;
2132    }
2133
2134    k->get_config(vdev, vdev->config);
2135
2136    val = ldub_p(vdev->config + addr);
2137    return val;
2138}
2139
2140uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2141{
2142    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2143    uint16_t val;
2144
2145    if (addr + sizeof(val) > vdev->config_len) {
2146        return (uint32_t)-1;
2147    }
2148
2149    k->get_config(vdev, vdev->config);
2150
2151    val = lduw_le_p(vdev->config + addr);
2152    return val;
2153}
2154
2155uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2156{
2157    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2158    uint32_t val;
2159
2160    if (addr + sizeof(val) > vdev->config_len) {
2161        return (uint32_t)-1;
2162    }
2163
2164    k->get_config(vdev, vdev->config);
2165
2166    val = ldl_le_p(vdev->config + addr);
2167    return val;
2168}
2169
2170void virtio_config_modern_writeb(VirtIODevice *vdev,
2171                                 uint32_t addr, uint32_t data)
2172{
2173    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2174    uint8_t val = data;
2175
2176    if (addr + sizeof(val) > vdev->config_len) {
2177        return;
2178    }
2179
2180    stb_p(vdev->config + addr, val);
2181
2182    if (k->set_config) {
2183        k->set_config(vdev, vdev->config);
2184    }
2185}
2186
2187void virtio_config_modern_writew(VirtIODevice *vdev,
2188                                 uint32_t addr, uint32_t data)
2189{
2190    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2191    uint16_t val = data;
2192
2193    if (addr + sizeof(val) > vdev->config_len) {
2194        return;
2195    }
2196
2197    stw_le_p(vdev->config + addr, val);
2198
2199    if (k->set_config) {
2200        k->set_config(vdev, vdev->config);
2201    }
2202}
2203
2204void virtio_config_modern_writel(VirtIODevice *vdev,
2205                                 uint32_t addr, uint32_t data)
2206{
2207    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2208    uint32_t val = data;
2209
2210    if (addr + sizeof(val) > vdev->config_len) {
2211        return;
2212    }
2213
2214    stl_le_p(vdev->config + addr, val);
2215
2216    if (k->set_config) {
2217        k->set_config(vdev, vdev->config);
2218    }
2219}
2220
2221void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2222{
2223    if (!vdev->vq[n].vring.num) {
2224        return;
2225    }
2226    vdev->vq[n].vring.desc = addr;
2227    virtio_queue_update_rings(vdev, n);
2228}
2229
2230hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2231{
2232    return vdev->vq[n].vring.desc;
2233}
2234
2235void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2236                            hwaddr avail, hwaddr used)
2237{
2238    if (!vdev->vq[n].vring.num) {
2239        return;
2240    }
2241    vdev->vq[n].vring.desc = desc;
2242    vdev->vq[n].vring.avail = avail;
2243    vdev->vq[n].vring.used = used;
2244    virtio_init_region_cache(vdev, n);
2245}
2246
2247void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2248{
2249    /* Don't allow guest to flip queue between existent and
2250     * nonexistent states, or to set it to an invalid size.
2251     */
2252    if (!!num != !!vdev->vq[n].vring.num ||
2253        num > VIRTQUEUE_MAX_SIZE ||
2254        num < 0) {
2255        return;
2256    }
2257    vdev->vq[n].vring.num = num;
2258}
2259
2260VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2261{
2262    return QLIST_FIRST(&vdev->vector_queues[vector]);
2263}
2264
2265VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2266{
2267    return QLIST_NEXT(vq, node);
2268}
2269
2270int virtio_queue_get_num(VirtIODevice *vdev, int n)
2271{
2272    return vdev->vq[n].vring.num;
2273}
2274
2275int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2276{
2277    return vdev->vq[n].vring.num_default;
2278}
2279
2280int virtio_get_num_queues(VirtIODevice *vdev)
2281{
2282    int i;
2283
2284    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2285        if (!virtio_queue_get_num(vdev, i)) {
2286            break;
2287        }
2288    }
2289
2290    return i;
2291}
2292
2293void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2294{
2295    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2296    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2297
2298    /* virtio-1 compliant devices cannot change the alignment */
2299    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2300        error_report("tried to modify queue alignment for virtio-1 device");
2301        return;
2302    }
2303    /* Check that the transport told us it was going to do this
2304     * (so a buggy transport will immediately assert rather than
2305     * silently failing to migrate this state)
2306     */
2307    assert(k->has_variable_vring_alignment);
2308
2309    if (align) {
2310        vdev->vq[n].vring.align = align;
2311        virtio_queue_update_rings(vdev, n);
2312    }
2313}
2314
2315static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2316{
2317    bool ret = false;
2318
2319    if (vq->vring.desc && vq->handle_aio_output) {
2320        VirtIODevice *vdev = vq->vdev;
2321
2322        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2323        ret = vq->handle_aio_output(vdev, vq);
2324
2325        if (unlikely(vdev->start_on_kick)) {
2326            virtio_set_started(vdev, true);
2327        }
2328    }
2329
2330    return ret;
2331}
2332
2333static void virtio_queue_notify_vq(VirtQueue *vq)
2334{
2335    if (vq->vring.desc && vq->handle_output) {
2336        VirtIODevice *vdev = vq->vdev;
2337
2338        if (unlikely(vdev->broken)) {
2339            return;
2340        }
2341
2342        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2343        vq->handle_output(vdev, vq);
2344
2345        if (unlikely(vdev->start_on_kick)) {
2346            virtio_set_started(vdev, true);
2347        }
2348    }
2349}
2350
2351void virtio_queue_notify(VirtIODevice *vdev, int n)
2352{
2353    VirtQueue *vq = &vdev->vq[n];
2354
2355    if (unlikely(!vq->vring.desc || vdev->broken)) {
2356        return;
2357    }
2358
2359    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2360    if (vq->host_notifier_enabled) {
2361        event_notifier_set(&vq->host_notifier);
2362    } else if (vq->handle_output) {
2363        vq->handle_output(vdev, vq);
2364
2365        if (unlikely(vdev->start_on_kick)) {
2366            virtio_set_started(vdev, true);
2367        }
2368    }
2369}
2370
2371uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2372{
2373    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2374        VIRTIO_NO_VECTOR;
2375}
2376
2377void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2378{
2379    VirtQueue *vq = &vdev->vq[n];
2380
2381    if (n < VIRTIO_QUEUE_MAX) {
2382        if (vdev->vector_queues &&
2383            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2384            QLIST_REMOVE(vq, node);
2385        }
2386        vdev->vq[n].vector = vector;
2387        if (vdev->vector_queues &&
2388            vector != VIRTIO_NO_VECTOR) {
2389            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2390        }
2391    }
2392}
2393
2394VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2395                            VirtIOHandleOutput handle_output)
2396{
2397    int i;
2398
2399    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2400        if (vdev->vq[i].vring.num == 0)
2401            break;
2402    }
2403
2404    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2405        abort();
2406
2407    vdev->vq[i].vring.num = queue_size;
2408    vdev->vq[i].vring.num_default = queue_size;
2409    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2410    vdev->vq[i].handle_output = handle_output;
2411    vdev->vq[i].handle_aio_output = NULL;
2412    vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2413                                       queue_size);
2414
2415    return &vdev->vq[i];
2416}
2417
2418void virtio_delete_queue(VirtQueue *vq)
2419{
2420    vq->vring.num = 0;
2421    vq->vring.num_default = 0;
2422    vq->handle_output = NULL;
2423    vq->handle_aio_output = NULL;
2424    g_free(vq->used_elems);
2425    vq->used_elems = NULL;
2426    virtio_virtqueue_reset_region_cache(vq);
2427}
2428
2429void virtio_del_queue(VirtIODevice *vdev, int n)
2430{
2431    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2432        abort();
2433    }
2434
2435    virtio_delete_queue(&vdev->vq[n]);
2436}
2437
2438static void virtio_set_isr(VirtIODevice *vdev, int value)
2439{
2440    uint8_t old = qatomic_read(&vdev->isr);
2441
2442    /* Do not write ISR if it does not change, so that its cacheline remains
2443     * shared in the common case where the guest does not read it.
2444     */
2445    if ((old & value) != value) {
2446        qatomic_or(&vdev->isr, value);
2447    }
2448}
2449
2450/* Called within rcu_read_lock(). */
2451static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2452{
2453    uint16_t old, new;
2454    bool v;
2455    /* We need to expose used array entries before checking used event. */
2456    smp_mb();
2457    /* Always notify when queue is empty (when feature acknowledge) */
2458    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2459        !vq->inuse && virtio_queue_empty(vq)) {
2460        return true;
2461    }
2462
2463    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2464        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2465    }
2466
2467    v = vq->signalled_used_valid;
2468    vq->signalled_used_valid = true;
2469    old = vq->signalled_used;
2470    new = vq->signalled_used = vq->used_idx;
2471    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2472}
2473
2474static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2475                                    uint16_t off_wrap, uint16_t new,
2476                                    uint16_t old)
2477{
2478    int off = off_wrap & ~(1 << 15);
2479
2480    if (wrap != off_wrap >> 15) {
2481        off -= vq->vring.num;
2482    }
2483
2484    return vring_need_event(off, new, old);
2485}
2486
2487/* Called within rcu_read_lock(). */
2488static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2489{
2490    VRingPackedDescEvent e;
2491    uint16_t old, new;
2492    bool v;
2493    VRingMemoryRegionCaches *caches;
2494
2495    caches = vring_get_region_caches(vq);
2496    if (!caches) {
2497        return false;
2498    }
2499
2500    vring_packed_event_read(vdev, &caches->avail, &e);
2501
2502    old = vq->signalled_used;
2503    new = vq->signalled_used = vq->used_idx;
2504    v = vq->signalled_used_valid;
2505    vq->signalled_used_valid = true;
2506
2507    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2508        return false;
2509    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2510        return true;
2511    }
2512
2513    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2514                                         e.off_wrap, new, old);
2515}
2516
2517/* Called within rcu_read_lock().  */
2518static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2519{
2520    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2521        return virtio_packed_should_notify(vdev, vq);
2522    } else {
2523        return virtio_split_should_notify(vdev, vq);
2524    }
2525}
2526
2527void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2528{
2529    WITH_RCU_READ_LOCK_GUARD() {
2530        if (!virtio_should_notify(vdev, vq)) {
2531            return;
2532        }
2533    }
2534
2535    trace_virtio_notify_irqfd(vdev, vq);
2536
2537    /*
2538     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2539     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2540     * incorrectly polling this bit during crashdump and hibernation
2541     * in MSI mode, causing a hang if this bit is never updated.
2542     * Recent releases of Windows do not really shut down, but rather
2543     * log out and hibernate to make the next startup faster.  Hence,
2544     * this manifested as a more serious hang during shutdown with
2545     *
2546     * Next driver release from 2016 fixed this problem, so working around it
2547     * is not a must, but it's easy to do so let's do it here.
2548     *
2549     * Note: it's safe to update ISR from any thread as it was switched
2550     * to an atomic operation.
2551     */
2552    virtio_set_isr(vq->vdev, 0x1);
2553    event_notifier_set(&vq->guest_notifier);
2554}
2555
2556static void virtio_irq(VirtQueue *vq)
2557{
2558    virtio_set_isr(vq->vdev, 0x1);
2559    virtio_notify_vector(vq->vdev, vq->vector);
2560}
2561
2562void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2563{
2564    WITH_RCU_READ_LOCK_GUARD() {
2565        if (!virtio_should_notify(vdev, vq)) {
2566            return;
2567        }
2568    }
2569
2570    trace_virtio_notify(vdev, vq);
2571    virtio_irq(vq);
2572}
2573
2574void virtio_notify_config(VirtIODevice *vdev)
2575{
2576    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2577        return;
2578
2579    virtio_set_isr(vdev, 0x3);
2580    vdev->generation++;
2581    virtio_notify_vector(vdev, vdev->config_vector);
2582}
2583
2584static bool virtio_device_endian_needed(void *opaque)
2585{
2586    VirtIODevice *vdev = opaque;
2587
2588    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2589    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2590        return vdev->device_endian != virtio_default_endian();
2591    }
2592    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2593    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2594}
2595
2596static bool virtio_64bit_features_needed(void *opaque)
2597{
2598    VirtIODevice *vdev = opaque;
2599
2600    return (vdev->host_features >> 32) != 0;
2601}
2602
2603static bool virtio_virtqueue_needed(void *opaque)
2604{
2605    VirtIODevice *vdev = opaque;
2606
2607    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2608}
2609
2610static bool virtio_packed_virtqueue_needed(void *opaque)
2611{
2612    VirtIODevice *vdev = opaque;
2613
2614    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2615}
2616
2617static bool virtio_ringsize_needed(void *opaque)
2618{
2619    VirtIODevice *vdev = opaque;
2620    int i;
2621
2622    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2623        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2624            return true;
2625        }
2626    }
2627    return false;
2628}
2629
2630static bool virtio_extra_state_needed(void *opaque)
2631{
2632    VirtIODevice *vdev = opaque;
2633    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2634    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2635
2636    return k->has_extra_state &&
2637        k->has_extra_state(qbus->parent);
2638}
2639
2640static bool virtio_broken_needed(void *opaque)
2641{
2642    VirtIODevice *vdev = opaque;
2643
2644    return vdev->broken;
2645}
2646
2647static bool virtio_started_needed(void *opaque)
2648{
2649    VirtIODevice *vdev = opaque;
2650
2651    return vdev->started;
2652}
2653
2654static bool virtio_disabled_needed(void *opaque)
2655{
2656    VirtIODevice *vdev = opaque;
2657
2658    return vdev->disabled;
2659}
2660
2661static const VMStateDescription vmstate_virtqueue = {
2662    .name = "virtqueue_state",
2663    .version_id = 1,
2664    .minimum_version_id = 1,
2665    .fields = (VMStateField[]) {
2666        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2667        VMSTATE_UINT64(vring.used, struct VirtQueue),
2668        VMSTATE_END_OF_LIST()
2669    }
2670};
2671
2672static const VMStateDescription vmstate_packed_virtqueue = {
2673    .name = "packed_virtqueue_state",
2674    .version_id = 1,
2675    .minimum_version_id = 1,
2676    .fields = (VMStateField[]) {
2677        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2678        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2679        VMSTATE_UINT16(used_idx, struct VirtQueue),
2680        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2681        VMSTATE_UINT32(inuse, struct VirtQueue),
2682        VMSTATE_END_OF_LIST()
2683    }
2684};
2685
2686static const VMStateDescription vmstate_virtio_virtqueues = {
2687    .name = "virtio/virtqueues",
2688    .version_id = 1,
2689    .minimum_version_id = 1,
2690    .needed = &virtio_virtqueue_needed,
2691    .fields = (VMStateField[]) {
2692        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2693                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2694        VMSTATE_END_OF_LIST()
2695    }
2696};
2697
2698static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2699    .name = "virtio/packed_virtqueues",
2700    .version_id = 1,
2701    .minimum_version_id = 1,
2702    .needed = &virtio_packed_virtqueue_needed,
2703    .fields = (VMStateField[]) {
2704        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2705                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2706        VMSTATE_END_OF_LIST()
2707    }
2708};
2709
2710static const VMStateDescription vmstate_ringsize = {
2711    .name = "ringsize_state",
2712    .version_id = 1,
2713    .minimum_version_id = 1,
2714    .fields = (VMStateField[]) {
2715        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2716        VMSTATE_END_OF_LIST()
2717    }
2718};
2719
2720static const VMStateDescription vmstate_virtio_ringsize = {
2721    .name = "virtio/ringsize",
2722    .version_id = 1,
2723    .minimum_version_id = 1,
2724    .needed = &virtio_ringsize_needed,
2725    .fields = (VMStateField[]) {
2726        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2727                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2728        VMSTATE_END_OF_LIST()
2729    }
2730};
2731
2732static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2733                           const VMStateField *field)
2734{
2735    VirtIODevice *vdev = pv;
2736    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2737    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2738
2739    if (!k->load_extra_state) {
2740        return -1;
2741    } else {
2742        return k->load_extra_state(qbus->parent, f);
2743    }
2744}
2745
2746static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2747                           const VMStateField *field, JSONWriter *vmdesc)
2748{
2749    VirtIODevice *vdev = pv;
2750    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2751    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2752
2753    k->save_extra_state(qbus->parent, f);
2754    return 0;
2755}
2756
2757static const VMStateInfo vmstate_info_extra_state = {
2758    .name = "virtqueue_extra_state",
2759    .get = get_extra_state,
2760    .put = put_extra_state,
2761};
2762
2763static const VMStateDescription vmstate_virtio_extra_state = {
2764    .name = "virtio/extra_state",
2765    .version_id = 1,
2766    .minimum_version_id = 1,
2767    .needed = &virtio_extra_state_needed,
2768    .fields = (VMStateField[]) {
2769        {
2770            .name         = "extra_state",
2771            .version_id   = 0,
2772            .field_exists = NULL,
2773            .size         = 0,
2774            .info         = &vmstate_info_extra_state,
2775            .flags        = VMS_SINGLE,
2776            .offset       = 0,
2777        },
2778        VMSTATE_END_OF_LIST()
2779    }
2780};
2781
2782static const VMStateDescription vmstate_virtio_device_endian = {
2783    .name = "virtio/device_endian",
2784    .version_id = 1,
2785    .minimum_version_id = 1,
2786    .needed = &virtio_device_endian_needed,
2787    .fields = (VMStateField[]) {
2788        VMSTATE_UINT8(device_endian, VirtIODevice),
2789        VMSTATE_END_OF_LIST()
2790    }
2791};
2792
2793static const VMStateDescription vmstate_virtio_64bit_features = {
2794    .name = "virtio/64bit_features",
2795    .version_id = 1,
2796    .minimum_version_id = 1,
2797    .needed = &virtio_64bit_features_needed,
2798    .fields = (VMStateField[]) {
2799        VMSTATE_UINT64(guest_features, VirtIODevice),
2800        VMSTATE_END_OF_LIST()
2801    }
2802};
2803
2804static const VMStateDescription vmstate_virtio_broken = {
2805    .name = "virtio/broken",
2806    .version_id = 1,
2807    .minimum_version_id = 1,
2808    .needed = &virtio_broken_needed,
2809    .fields = (VMStateField[]) {
2810        VMSTATE_BOOL(broken, VirtIODevice),
2811        VMSTATE_END_OF_LIST()
2812    }
2813};
2814
2815static const VMStateDescription vmstate_virtio_started = {
2816    .name = "virtio/started",
2817    .version_id = 1,
2818    .minimum_version_id = 1,
2819    .needed = &virtio_started_needed,
2820    .fields = (VMStateField[]) {
2821        VMSTATE_BOOL(started, VirtIODevice),
2822        VMSTATE_END_OF_LIST()
2823    }
2824};
2825
2826static const VMStateDescription vmstate_virtio_disabled = {
2827    .name = "virtio/disabled",
2828    .version_id = 1,
2829    .minimum_version_id = 1,
2830    .needed = &virtio_disabled_needed,
2831    .fields = (VMStateField[]) {
2832        VMSTATE_BOOL(disabled, VirtIODevice),
2833        VMSTATE_END_OF_LIST()
2834    }
2835};
2836
2837static const VMStateDescription vmstate_virtio = {
2838    .name = "virtio",
2839    .version_id = 1,
2840    .minimum_version_id = 1,
2841    .minimum_version_id_old = 1,
2842    .fields = (VMStateField[]) {
2843        VMSTATE_END_OF_LIST()
2844    },
2845    .subsections = (const VMStateDescription*[]) {
2846        &vmstate_virtio_device_endian,
2847        &vmstate_virtio_64bit_features,
2848        &vmstate_virtio_virtqueues,
2849        &vmstate_virtio_ringsize,
2850        &vmstate_virtio_broken,
2851        &vmstate_virtio_extra_state,
2852        &vmstate_virtio_started,
2853        &vmstate_virtio_packed_virtqueues,
2854        &vmstate_virtio_disabled,
2855        NULL
2856    }
2857};
2858
2859int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2860{
2861    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2862    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2863    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2864    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2865    int i;
2866
2867    if (k->save_config) {
2868        k->save_config(qbus->parent, f);
2869    }
2870
2871    qemu_put_8s(f, &vdev->status);
2872    qemu_put_8s(f, &vdev->isr);
2873    qemu_put_be16s(f, &vdev->queue_sel);
2874    qemu_put_be32s(f, &guest_features_lo);
2875    qemu_put_be32(f, vdev->config_len);
2876    qemu_put_buffer(f, vdev->config, vdev->config_len);
2877
2878    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2879        if (vdev->vq[i].vring.num == 0)
2880            break;
2881    }
2882
2883    qemu_put_be32(f, i);
2884
2885    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2886        if (vdev->vq[i].vring.num == 0)
2887            break;
2888
2889        qemu_put_be32(f, vdev->vq[i].vring.num);
2890        if (k->has_variable_vring_alignment) {
2891            qemu_put_be32(f, vdev->vq[i].vring.align);
2892        }
2893        /*
2894         * Save desc now, the rest of the ring addresses are saved in
2895         * subsections for VIRTIO-1 devices.
2896         */
2897        qemu_put_be64(f, vdev->vq[i].vring.desc);
2898        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2899        if (k->save_queue) {
2900            k->save_queue(qbus->parent, i, f);
2901        }
2902    }
2903
2904    if (vdc->save != NULL) {
2905        vdc->save(vdev, f);
2906    }
2907
2908    if (vdc->vmsd) {
2909        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2910        if (ret) {
2911            return ret;
2912        }
2913    }
2914
2915    /* Subsections */
2916    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2917}
2918
2919/* A wrapper for use as a VMState .put function */
2920static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2921                              const VMStateField *field, JSONWriter *vmdesc)
2922{
2923    return virtio_save(VIRTIO_DEVICE(opaque), f);
2924}
2925
2926/* A wrapper for use as a VMState .get function */
2927static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2928                             const VMStateField *field)
2929{
2930    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2931    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2932
2933    return virtio_load(vdev, f, dc->vmsd->version_id);
2934}
2935
2936const VMStateInfo  virtio_vmstate_info = {
2937    .name = "virtio",
2938    .get = virtio_device_get,
2939    .put = virtio_device_put,
2940};
2941
2942static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2943{
2944    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2945    bool bad = (val & ~(vdev->host_features)) != 0;
2946
2947    val &= vdev->host_features;
2948    if (k->set_features) {
2949        k->set_features(vdev, val);
2950    }
2951    vdev->guest_features = val;
2952    return bad ? -1 : 0;
2953}
2954
2955int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2956{
2957    int ret;
2958    /*
2959     * The driver must not attempt to set features after feature negotiation
2960     * has finished.
2961     */
2962    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2963        return -EINVAL;
2964    }
2965    ret = virtio_set_features_nocheck(vdev, val);
2966    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2967        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2968        int i;
2969        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2970            if (vdev->vq[i].vring.num != 0) {
2971                virtio_init_region_cache(vdev, i);
2972            }
2973        }
2974    }
2975    if (!ret) {
2976        if (!virtio_device_started(vdev, vdev->status) &&
2977            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2978            vdev->start_on_kick = true;
2979        }
2980    }
2981    return ret;
2982}
2983
2984size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2985                                      uint64_t host_features)
2986{
2987    size_t config_size = 0;
2988    int i;
2989
2990    for (i = 0; feature_sizes[i].flags != 0; i++) {
2991        if (host_features & feature_sizes[i].flags) {
2992            config_size = MAX(feature_sizes[i].end, config_size);
2993        }
2994    }
2995
2996    return config_size;
2997}
2998
2999int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3000{
3001    int i, ret;
3002    int32_t config_len;
3003    uint32_t num;
3004    uint32_t features;
3005    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3006    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3007    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3008
3009    /*
3010     * We poison the endianness to ensure it does not get used before
3011     * subsections have been loaded.
3012     */
3013    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3014
3015    if (k->load_config) {
3016        ret = k->load_config(qbus->parent, f);
3017        if (ret)
3018            return ret;
3019    }
3020
3021    qemu_get_8s(f, &vdev->status);
3022    qemu_get_8s(f, &vdev->isr);
3023    qemu_get_be16s(f, &vdev->queue_sel);
3024    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3025        return -1;
3026    }
3027    qemu_get_be32s(f, &features);
3028
3029    /*
3030     * Temporarily set guest_features low bits - needed by
3031     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3032     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3033     *
3034     * Note: devices should always test host features in future - don't create
3035     * new dependencies like this.
3036     */
3037    vdev->guest_features = features;
3038
3039    config_len = qemu_get_be32(f);
3040
3041    /*
3042     * There are cases where the incoming config can be bigger or smaller
3043     * than what we have; so load what we have space for, and skip
3044     * any excess that's in the stream.
3045     */
3046    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3047
3048    while (config_len > vdev->config_len) {
3049        qemu_get_byte(f);
3050        config_len--;
3051    }
3052
3053    num = qemu_get_be32(f);
3054
3055    if (num > VIRTIO_QUEUE_MAX) {
3056        error_report("Invalid number of virtqueues: 0x%x", num);
3057        return -1;
3058    }
3059
3060    for (i = 0; i < num; i++) {
3061        vdev->vq[i].vring.num = qemu_get_be32(f);
3062        if (k->has_variable_vring_alignment) {
3063            vdev->vq[i].vring.align = qemu_get_be32(f);
3064        }
3065        vdev->vq[i].vring.desc = qemu_get_be64(f);
3066        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3067        vdev->vq[i].signalled_used_valid = false;
3068        vdev->vq[i].notification = true;
3069
3070        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3071            error_report("VQ %d address 0x0 "
3072                         "inconsistent with Host index 0x%x",
3073                         i, vdev->vq[i].last_avail_idx);
3074            return -1;
3075        }
3076        if (k->load_queue) {
3077            ret = k->load_queue(qbus->parent, i, f);
3078            if (ret)
3079                return ret;
3080        }
3081    }
3082
3083    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3084
3085    if (vdc->load != NULL) {
3086        ret = vdc->load(vdev, f, version_id);
3087        if (ret) {
3088            return ret;
3089        }
3090    }
3091
3092    if (vdc->vmsd) {
3093        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3094        if (ret) {
3095            return ret;
3096        }
3097    }
3098
3099    /* Subsections */
3100    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3101    if (ret) {
3102        return ret;
3103    }
3104
3105    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3106        vdev->device_endian = virtio_default_endian();
3107    }
3108
3109    if (virtio_64bit_features_needed(vdev)) {
3110        /*
3111         * Subsection load filled vdev->guest_features.  Run them
3112         * through virtio_set_features to sanity-check them against
3113         * host_features.
3114         */
3115        uint64_t features64 = vdev->guest_features;
3116        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3117            error_report("Features 0x%" PRIx64 " unsupported. "
3118                         "Allowed features: 0x%" PRIx64,
3119                         features64, vdev->host_features);
3120            return -1;
3121        }
3122    } else {
3123        if (virtio_set_features_nocheck(vdev, features) < 0) {
3124            error_report("Features 0x%x unsupported. "
3125                         "Allowed features: 0x%" PRIx64,
3126                         features, vdev->host_features);
3127            return -1;
3128        }
3129    }
3130
3131    if (!virtio_device_started(vdev, vdev->status) &&
3132        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3133        vdev->start_on_kick = true;
3134    }
3135
3136    RCU_READ_LOCK_GUARD();
3137    for (i = 0; i < num; i++) {
3138        if (vdev->vq[i].vring.desc) {
3139            uint16_t nheads;
3140
3141            /*
3142             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3143             * only the region cache needs to be set up.  Legacy devices need
3144             * to calculate used and avail ring addresses based on the desc
3145             * address.
3146             */
3147            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3148                virtio_init_region_cache(vdev, i);
3149            } else {
3150                virtio_queue_update_rings(vdev, i);
3151            }
3152
3153            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3154                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3155                vdev->vq[i].shadow_avail_wrap_counter =
3156                                        vdev->vq[i].last_avail_wrap_counter;
3157                continue;
3158            }
3159
3160            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3161            /* Check it isn't doing strange things with descriptor numbers. */
3162            if (nheads > vdev->vq[i].vring.num) {
3163                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3164                             "inconsistent with Host index 0x%x: delta 0x%x",
3165                             i, vdev->vq[i].vring.num,
3166                             vring_avail_idx(&vdev->vq[i]),
3167                             vdev->vq[i].last_avail_idx, nheads);
3168                vdev->vq[i].used_idx = 0;
3169                vdev->vq[i].shadow_avail_idx = 0;
3170                vdev->vq[i].inuse = 0;
3171                continue;
3172            }
3173            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3174            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3175
3176            /*
3177             * Some devices migrate VirtQueueElements that have been popped
3178             * from the avail ring but not yet returned to the used ring.
3179             * Since max ring size < UINT16_MAX it's safe to use modulo
3180             * UINT16_MAX + 1 subtraction.
3181             */
3182            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3183                                vdev->vq[i].used_idx);
3184            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3185                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3186                             "used_idx 0x%x",
3187                             i, vdev->vq[i].vring.num,
3188                             vdev->vq[i].last_avail_idx,
3189                             vdev->vq[i].used_idx);
3190                return -1;
3191            }
3192        }
3193    }
3194
3195    if (vdc->post_load) {
3196        ret = vdc->post_load(vdev);
3197        if (ret) {
3198            return ret;
3199        }
3200    }
3201
3202    return 0;
3203}
3204
3205void virtio_cleanup(VirtIODevice *vdev)
3206{
3207    qemu_del_vm_change_state_handler(vdev->vmstate);
3208}
3209
3210static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3211{
3212    VirtIODevice *vdev = opaque;
3213    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3214    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3215    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3216    vdev->vm_running = running;
3217
3218    if (backend_run) {
3219        virtio_set_status(vdev, vdev->status);
3220    }
3221
3222    if (k->vmstate_change) {
3223        k->vmstate_change(qbus->parent, backend_run);
3224    }
3225
3226    if (!backend_run) {
3227        virtio_set_status(vdev, vdev->status);
3228    }
3229}
3230
3231void virtio_instance_init_common(Object *proxy_obj, void *data,
3232                                 size_t vdev_size, const char *vdev_name)
3233{
3234    DeviceState *vdev = data;
3235
3236    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3237                                       vdev_size, vdev_name, &error_abort,
3238                                       NULL);
3239    qdev_alias_all_properties(vdev, proxy_obj);
3240}
3241
3242void virtio_init(VirtIODevice *vdev, const char *name,
3243                 uint16_t device_id, size_t config_size)
3244{
3245    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3246    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3247    int i;
3248    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3249
3250    if (nvectors) {
3251        vdev->vector_queues =
3252            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3253    }
3254
3255    vdev->start_on_kick = false;
3256    vdev->started = false;
3257    vdev->device_id = device_id;
3258    vdev->status = 0;
3259    qatomic_set(&vdev->isr, 0);
3260    vdev->queue_sel = 0;
3261    vdev->config_vector = VIRTIO_NO_VECTOR;
3262    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3263    vdev->vm_running = runstate_is_running();
3264    vdev->broken = false;
3265    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3266        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3267        vdev->vq[i].vdev = vdev;
3268        vdev->vq[i].queue_index = i;
3269        vdev->vq[i].host_notifier_enabled = false;
3270    }
3271
3272    vdev->name = name;
3273    vdev->config_len = config_size;
3274    if (vdev->config_len) {
3275        vdev->config = g_malloc0(config_size);
3276    } else {
3277        vdev->config = NULL;
3278    }
3279    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3280            virtio_vmstate_change, vdev);
3281    vdev->device_endian = virtio_default_endian();
3282    vdev->use_guest_notifier_mask = true;
3283}
3284
3285/*
3286 * Only devices that have already been around prior to defining the virtio
3287 * standard support legacy mode; this includes devices not specified in the
3288 * standard. All newer devices conform to the virtio standard only.
3289 */
3290bool virtio_legacy_allowed(VirtIODevice *vdev)
3291{
3292    switch (vdev->device_id) {
3293    case VIRTIO_ID_NET:
3294    case VIRTIO_ID_BLOCK:
3295    case VIRTIO_ID_CONSOLE:
3296    case VIRTIO_ID_RNG:
3297    case VIRTIO_ID_BALLOON:
3298    case VIRTIO_ID_RPMSG:
3299    case VIRTIO_ID_SCSI:
3300    case VIRTIO_ID_9P:
3301    case VIRTIO_ID_RPROC_SERIAL:
3302    case VIRTIO_ID_CAIF:
3303        return true;
3304    default:
3305        return false;
3306    }
3307}
3308
3309bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3310{
3311    return vdev->disable_legacy_check;
3312}
3313
3314hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3315{
3316    return vdev->vq[n].vring.desc;
3317}
3318
3319bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3320{
3321    return virtio_queue_get_desc_addr(vdev, n) != 0;
3322}
3323
3324bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3325{
3326    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3327    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3328
3329    if (k->queue_enabled) {
3330        return k->queue_enabled(qbus->parent, n);
3331    }
3332    return virtio_queue_enabled_legacy(vdev, n);
3333}
3334
3335hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3336{
3337    return vdev->vq[n].vring.avail;
3338}
3339
3340hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3341{
3342    return vdev->vq[n].vring.used;
3343}
3344
3345hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3346{
3347    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3348}
3349
3350hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3351{
3352    int s;
3353
3354    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3355        return sizeof(struct VRingPackedDescEvent);
3356    }
3357
3358    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3359    return offsetof(VRingAvail, ring) +
3360        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3361}
3362
3363hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3364{
3365    int s;
3366
3367    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3368        return sizeof(struct VRingPackedDescEvent);
3369    }
3370
3371    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3372    return offsetof(VRingUsed, ring) +
3373        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3374}
3375
3376static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3377                                                           int n)
3378{
3379    unsigned int avail, used;
3380
3381    avail = vdev->vq[n].last_avail_idx;
3382    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3383
3384    used = vdev->vq[n].used_idx;
3385    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3386
3387    return avail | used << 16;
3388}
3389
3390static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3391                                                      int n)
3392{
3393    return vdev->vq[n].last_avail_idx;
3394}
3395
3396unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3397{
3398    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3399        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3400    } else {
3401        return virtio_queue_split_get_last_avail_idx(vdev, n);
3402    }
3403}
3404
3405static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3406                                                   int n, unsigned int idx)
3407{
3408    struct VirtQueue *vq = &vdev->vq[n];
3409
3410    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3411    vq->last_avail_wrap_counter =
3412        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3413    idx >>= 16;
3414    vq->used_idx = idx & 0x7ffff;
3415    vq->used_wrap_counter = !!(idx & 0x8000);
3416}
3417
3418static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3419                                                  int n, unsigned int idx)
3420{
3421        vdev->vq[n].last_avail_idx = idx;
3422        vdev->vq[n].shadow_avail_idx = idx;
3423}
3424
3425void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3426                                     unsigned int idx)
3427{
3428    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3429        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3430    } else {
3431        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3432    }
3433}
3434
3435static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3436                                                       int n)
3437{
3438    /* We don't have a reference like avail idx in shared memory */
3439    return;
3440}
3441
3442static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3443                                                      int n)
3444{
3445    RCU_READ_LOCK_GUARD();
3446    if (vdev->vq[n].vring.desc) {
3447        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3448        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3449    }
3450}
3451
3452void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3453{
3454    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3455        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3456    } else {
3457        virtio_queue_split_restore_last_avail_idx(vdev, n);
3458    }
3459}
3460
3461static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3462{
3463    /* used idx was updated through set_last_avail_idx() */
3464    return;
3465}
3466
3467static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3468{
3469    RCU_READ_LOCK_GUARD();
3470    if (vdev->vq[n].vring.desc) {
3471        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3472    }
3473}
3474
3475void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3476{
3477    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3478        return virtio_queue_packed_update_used_idx(vdev, n);
3479    } else {
3480        return virtio_split_packed_update_used_idx(vdev, n);
3481    }
3482}
3483
3484void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3485{
3486    vdev->vq[n].signalled_used_valid = false;
3487}
3488
3489VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3490{
3491    return vdev->vq + n;
3492}
3493
3494uint16_t virtio_get_queue_index(VirtQueue *vq)
3495{
3496    return vq->queue_index;
3497}
3498
3499static void virtio_queue_guest_notifier_read(EventNotifier *n)
3500{
3501    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3502    if (event_notifier_test_and_clear(n)) {
3503        virtio_irq(vq);
3504    }
3505}
3506
3507void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3508                                                bool with_irqfd)
3509{
3510    if (assign && !with_irqfd) {
3511        event_notifier_set_handler(&vq->guest_notifier,
3512                                   virtio_queue_guest_notifier_read);
3513    } else {
3514        event_notifier_set_handler(&vq->guest_notifier, NULL);
3515    }
3516    if (!assign) {
3517        /* Test and clear notifier before closing it,
3518         * in case poll callback didn't have time to run. */
3519        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3520    }
3521}
3522
3523EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3524{
3525    return &vq->guest_notifier;
3526}
3527
3528static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3529{
3530    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3531    if (event_notifier_test_and_clear(n)) {
3532        virtio_queue_notify_aio_vq(vq);
3533    }
3534}
3535
3536static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3537{
3538    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3539
3540    virtio_queue_set_notification(vq, 0);
3541}
3542
3543static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3544{
3545    EventNotifier *n = opaque;
3546    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3547
3548    if (!vq->vring.desc || virtio_queue_empty(vq)) {
3549        return false;
3550    }
3551
3552    return virtio_queue_notify_aio_vq(vq);
3553}
3554
3555static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3556{
3557    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3558
3559    /* Caller polls once more after this to catch requests that race with us */
3560    virtio_queue_set_notification(vq, 1);
3561}
3562
3563void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3564                                                VirtIOHandleAIOOutput handle_output)
3565{
3566    if (handle_output) {
3567        vq->handle_aio_output = handle_output;
3568        aio_set_event_notifier(ctx, &vq->host_notifier, true,
3569                               virtio_queue_host_notifier_aio_read,
3570                               virtio_queue_host_notifier_aio_poll);
3571        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3572                                    virtio_queue_host_notifier_aio_poll_begin,
3573                                    virtio_queue_host_notifier_aio_poll_end);
3574    } else {
3575        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3576        /* Test and clear notifier before after disabling event,
3577         * in case poll callback didn't have time to run. */
3578        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3579        vq->handle_aio_output = NULL;
3580    }
3581}
3582
3583void virtio_queue_host_notifier_read(EventNotifier *n)
3584{
3585    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3586    if (event_notifier_test_and_clear(n)) {
3587        virtio_queue_notify_vq(vq);
3588    }
3589}
3590
3591EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3592{
3593    return &vq->host_notifier;
3594}
3595
3596void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3597{
3598    vq->host_notifier_enabled = enabled;
3599}
3600
3601int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3602                                      MemoryRegion *mr, bool assign)
3603{
3604    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3605    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3606
3607    if (k->set_host_notifier_mr) {
3608        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3609    }
3610
3611    return -1;
3612}
3613
3614void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3615{
3616    g_free(vdev->bus_name);
3617    vdev->bus_name = g_strdup(bus_name);
3618}
3619
3620void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3621{
3622    va_list ap;
3623
3624    va_start(ap, fmt);
3625    error_vreport(fmt, ap);
3626    va_end(ap);
3627
3628    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3629        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3630        virtio_notify_config(vdev);
3631    }
3632
3633    vdev->broken = true;
3634}
3635
3636static void virtio_memory_listener_commit(MemoryListener *listener)
3637{
3638    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3639    int i;
3640
3641    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3642        if (vdev->vq[i].vring.num == 0) {
3643            break;
3644        }
3645        virtio_init_region_cache(vdev, i);
3646    }
3647}
3648
3649static void virtio_device_realize(DeviceState *dev, Error **errp)
3650{
3651    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3652    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3653    Error *err = NULL;
3654
3655    /* Devices should either use vmsd or the load/save methods */
3656    assert(!vdc->vmsd || !vdc->load);
3657
3658    if (vdc->realize != NULL) {
3659        vdc->realize(dev, &err);
3660        if (err != NULL) {
3661            error_propagate(errp, err);
3662            return;
3663        }
3664    }
3665
3666    virtio_bus_device_plugged(vdev, &err);
3667    if (err != NULL) {
3668        error_propagate(errp, err);
3669        vdc->unrealize(dev);
3670        return;
3671    }
3672
3673    vdev->listener.commit = virtio_memory_listener_commit;
3674    memory_listener_register(&vdev->listener, vdev->dma_as);
3675}
3676
3677static void virtio_device_unrealize(DeviceState *dev)
3678{
3679    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3680    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3681
3682    memory_listener_unregister(&vdev->listener);
3683    virtio_bus_device_unplugged(vdev);
3684
3685    if (vdc->unrealize != NULL) {
3686        vdc->unrealize(dev);
3687    }
3688
3689    g_free(vdev->bus_name);
3690    vdev->bus_name = NULL;
3691}
3692
3693static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3694{
3695    int i;
3696    if (!vdev->vq) {
3697        return;
3698    }
3699
3700    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3701        if (vdev->vq[i].vring.num == 0) {
3702            break;
3703        }
3704        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3705    }
3706    g_free(vdev->vq);
3707}
3708
3709static void virtio_device_instance_finalize(Object *obj)
3710{
3711    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3712
3713    virtio_device_free_virtqueues(vdev);
3714
3715    g_free(vdev->config);
3716    g_free(vdev->vector_queues);
3717}
3718
3719static Property virtio_properties[] = {
3720    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3721    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3722    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3723    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3724                     disable_legacy_check, false),
3725    DEFINE_PROP_END_OF_LIST(),
3726};
3727
3728static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3729{
3730    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3731    int i, n, r, err;
3732
3733    /*
3734     * Batch all the host notifiers in a single transaction to avoid
3735     * quadratic time complexity in address_space_update_ioeventfds().
3736     */
3737    memory_region_transaction_begin();
3738    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3739        VirtQueue *vq = &vdev->vq[n];
3740        if (!virtio_queue_get_num(vdev, n)) {
3741            continue;
3742        }
3743        r = virtio_bus_set_host_notifier(qbus, n, true);
3744        if (r < 0) {
3745            err = r;
3746            goto assign_error;
3747        }
3748        event_notifier_set_handler(&vq->host_notifier,
3749                                   virtio_queue_host_notifier_read);
3750    }
3751
3752    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3753        /* Kick right away to begin processing requests already in vring */
3754        VirtQueue *vq = &vdev->vq[n];
3755        if (!vq->vring.num) {
3756            continue;
3757        }
3758        event_notifier_set(&vq->host_notifier);
3759    }
3760    memory_region_transaction_commit();
3761    return 0;
3762
3763assign_error:
3764    i = n; /* save n for a second iteration after transaction is committed. */
3765    while (--n >= 0) {
3766        VirtQueue *vq = &vdev->vq[n];
3767        if (!virtio_queue_get_num(vdev, n)) {
3768            continue;
3769        }
3770
3771        event_notifier_set_handler(&vq->host_notifier, NULL);
3772        r = virtio_bus_set_host_notifier(qbus, n, false);
3773        assert(r >= 0);
3774    }
3775    /*
3776     * The transaction expects the ioeventfds to be open when it
3777     * commits. Do it now, before the cleanup loop.
3778     */
3779    memory_region_transaction_commit();
3780
3781    while (--i >= 0) {
3782        if (!virtio_queue_get_num(vdev, i)) {
3783            continue;
3784        }
3785        virtio_bus_cleanup_host_notifier(qbus, i);
3786    }
3787    return err;
3788}
3789
3790int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3791{
3792    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3793    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3794
3795    return virtio_bus_start_ioeventfd(vbus);
3796}
3797
3798static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3799{
3800    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3801    int n, r;
3802
3803    /*
3804     * Batch all the host notifiers in a single transaction to avoid
3805     * quadratic time complexity in address_space_update_ioeventfds().
3806     */
3807    memory_region_transaction_begin();
3808    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3809        VirtQueue *vq = &vdev->vq[n];
3810
3811        if (!virtio_queue_get_num(vdev, n)) {
3812            continue;
3813        }
3814        event_notifier_set_handler(&vq->host_notifier, NULL);
3815        r = virtio_bus_set_host_notifier(qbus, n, false);
3816        assert(r >= 0);
3817    }
3818    /*
3819     * The transaction expects the ioeventfds to be open when it
3820     * commits. Do it now, before the cleanup loop.
3821     */
3822    memory_region_transaction_commit();
3823
3824    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3825        if (!virtio_queue_get_num(vdev, n)) {
3826            continue;
3827        }
3828        virtio_bus_cleanup_host_notifier(qbus, n);
3829    }
3830}
3831
3832int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3833{
3834    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3835    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3836
3837    return virtio_bus_grab_ioeventfd(vbus);
3838}
3839
3840void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3841{
3842    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3843    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3844
3845    virtio_bus_release_ioeventfd(vbus);
3846}
3847
3848static void virtio_device_class_init(ObjectClass *klass, void *data)
3849{
3850    /* Set the default value here. */
3851    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3852    DeviceClass *dc = DEVICE_CLASS(klass);
3853
3854    dc->realize = virtio_device_realize;
3855    dc->unrealize = virtio_device_unrealize;
3856    dc->bus_type = TYPE_VIRTIO_BUS;
3857    device_class_set_props(dc, virtio_properties);
3858    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3859    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3860
3861    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3862}
3863
3864bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3865{
3866    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3867    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3868
3869    return virtio_bus_ioeventfd_enabled(vbus);
3870}
3871
3872static const TypeInfo virtio_device_info = {
3873    .name = TYPE_VIRTIO_DEVICE,
3874    .parent = TYPE_DEVICE,
3875    .instance_size = sizeof(VirtIODevice),
3876    .class_init = virtio_device_class_init,
3877    .instance_finalize = virtio_device_instance_finalize,
3878    .abstract = true,
3879    .class_size = sizeof(VirtioDeviceClass),
3880};
3881
3882static void virtio_register_types(void)
3883{
3884    type_register_static(&virtio_device_info);
3885}
3886
3887type_init(virtio_register_types)
3888