qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "cpu.h"
  17#include "trace.h"
  18#include "exec/address-spaces.h"
  19#include "qemu/error-report.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "qemu/atomic.h"
  25#include "hw/virtio/virtio-bus.h"
  26#include "hw/qdev-properties.h"
  27#include "hw/virtio/virtio-access.h"
  28#include "sysemu/dma.h"
  29#include "sysemu/runstate.h"
  30
  31/*
  32 * The alignment to use between consumer and producer parts of vring.
  33 * x86 pagesize again. This is the default, used by transports like PCI
  34 * which don't provide a means for the guest to tell the host the alignment.
  35 */
  36#define VIRTIO_PCI_VRING_ALIGN         4096
  37
  38typedef struct VRingDesc
  39{
  40    uint64_t addr;
  41    uint32_t len;
  42    uint16_t flags;
  43    uint16_t next;
  44} VRingDesc;
  45
  46typedef struct VRingPackedDesc {
  47    uint64_t addr;
  48    uint32_t len;
  49    uint16_t id;
  50    uint16_t flags;
  51} VRingPackedDesc;
  52
  53typedef struct VRingAvail
  54{
  55    uint16_t flags;
  56    uint16_t idx;
  57    uint16_t ring[0];
  58} VRingAvail;
  59
  60typedef struct VRingUsedElem
  61{
  62    uint32_t id;
  63    uint32_t len;
  64} VRingUsedElem;
  65
  66typedef struct VRingUsed
  67{
  68    uint16_t flags;
  69    uint16_t idx;
  70    VRingUsedElem ring[0];
  71} VRingUsed;
  72
  73typedef struct VRingMemoryRegionCaches {
  74    struct rcu_head rcu;
  75    MemoryRegionCache desc;
  76    MemoryRegionCache avail;
  77    MemoryRegionCache used;
  78} VRingMemoryRegionCaches;
  79
  80typedef struct VRing
  81{
  82    unsigned int num;
  83    unsigned int num_default;
  84    unsigned int align;
  85    hwaddr desc;
  86    hwaddr avail;
  87    hwaddr used;
  88    VRingMemoryRegionCaches *caches;
  89} VRing;
  90
  91typedef struct VRingPackedDescEvent {
  92    uint16_t off_wrap;
  93    uint16_t flags;
  94} VRingPackedDescEvent ;
  95
  96struct VirtQueue
  97{
  98    VRing vring;
  99    VirtQueueElement *used_elems;
 100
 101    /* Next head to pop */
 102    uint16_t last_avail_idx;
 103    bool last_avail_wrap_counter;
 104
 105    /* Last avail_idx read from VQ. */
 106    uint16_t shadow_avail_idx;
 107    bool shadow_avail_wrap_counter;
 108
 109    uint16_t used_idx;
 110    bool used_wrap_counter;
 111
 112    /* Last used index value we have signalled on */
 113    uint16_t signalled_used;
 114
 115    /* Last used index value we have signalled on */
 116    bool signalled_used_valid;
 117
 118    /* Notification enabled? */
 119    bool notification;
 120
 121    uint16_t queue_index;
 122
 123    unsigned int inuse;
 124
 125    uint16_t vector;
 126    VirtIOHandleOutput handle_output;
 127    VirtIOHandleAIOOutput handle_aio_output;
 128    VirtIODevice *vdev;
 129    EventNotifier guest_notifier;
 130    EventNotifier host_notifier;
 131    bool host_notifier_enabled;
 132    QLIST_ENTRY(VirtQueue) node;
 133};
 134
 135static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 136{
 137    if (!caches) {
 138        return;
 139    }
 140
 141    address_space_cache_destroy(&caches->desc);
 142    address_space_cache_destroy(&caches->avail);
 143    address_space_cache_destroy(&caches->used);
 144    g_free(caches);
 145}
 146
 147static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 148{
 149    VRingMemoryRegionCaches *caches;
 150
 151    caches = atomic_read(&vq->vring.caches);
 152    atomic_rcu_set(&vq->vring.caches, NULL);
 153    if (caches) {
 154        call_rcu(caches, virtio_free_region_cache, rcu);
 155    }
 156}
 157
 158static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 159{
 160    VirtQueue *vq = &vdev->vq[n];
 161    VRingMemoryRegionCaches *old = vq->vring.caches;
 162    VRingMemoryRegionCaches *new = NULL;
 163    hwaddr addr, size;
 164    int64_t len;
 165    bool packed;
 166
 167
 168    addr = vq->vring.desc;
 169    if (!addr) {
 170        goto out_no_cache;
 171    }
 172    new = g_new0(VRingMemoryRegionCaches, 1);
 173    size = virtio_queue_get_desc_size(vdev, n);
 174    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 175                                   true : false;
 176    len = address_space_cache_init(&new->desc, vdev->dma_as,
 177                                   addr, size, packed);
 178    if (len < size) {
 179        virtio_error(vdev, "Cannot map desc");
 180        goto err_desc;
 181    }
 182
 183    size = virtio_queue_get_used_size(vdev, n);
 184    len = address_space_cache_init(&new->used, vdev->dma_as,
 185                                   vq->vring.used, size, true);
 186    if (len < size) {
 187        virtio_error(vdev, "Cannot map used");
 188        goto err_used;
 189    }
 190
 191    size = virtio_queue_get_avail_size(vdev, n);
 192    len = address_space_cache_init(&new->avail, vdev->dma_as,
 193                                   vq->vring.avail, size, false);
 194    if (len < size) {
 195        virtio_error(vdev, "Cannot map avail");
 196        goto err_avail;
 197    }
 198
 199    atomic_rcu_set(&vq->vring.caches, new);
 200    if (old) {
 201        call_rcu(old, virtio_free_region_cache, rcu);
 202    }
 203    return;
 204
 205err_avail:
 206    address_space_cache_destroy(&new->avail);
 207err_used:
 208    address_space_cache_destroy(&new->used);
 209err_desc:
 210    address_space_cache_destroy(&new->desc);
 211out_no_cache:
 212    g_free(new);
 213    virtio_virtqueue_reset_region_cache(vq);
 214}
 215
 216/* virt queue functions */
 217void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 218{
 219    VRing *vring = &vdev->vq[n].vring;
 220
 221    if (!vring->num || !vring->desc || !vring->align) {
 222        /* not yet setup -> nothing to do */
 223        return;
 224    }
 225    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 226    vring->used = vring_align(vring->avail +
 227                              offsetof(VRingAvail, ring[vring->num]),
 228                              vring->align);
 229    virtio_init_region_cache(vdev, n);
 230}
 231
 232/* Called within rcu_read_lock().  */
 233static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 234                                  MemoryRegionCache *cache, int i)
 235{
 236    address_space_read_cached(cache, i * sizeof(VRingDesc),
 237                              desc, sizeof(VRingDesc));
 238    virtio_tswap64s(vdev, &desc->addr);
 239    virtio_tswap32s(vdev, &desc->len);
 240    virtio_tswap16s(vdev, &desc->flags);
 241    virtio_tswap16s(vdev, &desc->next);
 242}
 243
 244static void vring_packed_event_read(VirtIODevice *vdev,
 245                                    MemoryRegionCache *cache,
 246                                    VRingPackedDescEvent *e)
 247{
 248    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 249    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 250
 251    address_space_read_cached(cache, off_flags, &e->flags,
 252                              sizeof(e->flags));
 253    /* Make sure flags is seen before off_wrap */
 254    smp_rmb();
 255    address_space_read_cached(cache, off_off, &e->off_wrap,
 256                              sizeof(e->off_wrap));
 257    virtio_tswap16s(vdev, &e->off_wrap);
 258    virtio_tswap16s(vdev, &e->flags);
 259}
 260
 261static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 262                                        MemoryRegionCache *cache,
 263                                        uint16_t off_wrap)
 264{
 265    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 266
 267    virtio_tswap16s(vdev, &off_wrap);
 268    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
 269    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 270}
 271
 272static void vring_packed_flags_write(VirtIODevice *vdev,
 273                                     MemoryRegionCache *cache, uint16_t flags)
 274{
 275    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 276
 277    virtio_tswap16s(vdev, &flags);
 278    address_space_write_cached(cache, off, &flags, sizeof(flags));
 279    address_space_cache_invalidate(cache, off, sizeof(flags));
 280}
 281
 282/* Called within rcu_read_lock().  */
 283static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 284{
 285    VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
 286    assert(caches != NULL);
 287    return caches;
 288}
 289/* Called within rcu_read_lock().  */
 290static inline uint16_t vring_avail_flags(VirtQueue *vq)
 291{
 292    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 293    hwaddr pa = offsetof(VRingAvail, flags);
 294    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 295}
 296
 297/* Called within rcu_read_lock().  */
 298static inline uint16_t vring_avail_idx(VirtQueue *vq)
 299{
 300    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 301    hwaddr pa = offsetof(VRingAvail, idx);
 302    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 303    return vq->shadow_avail_idx;
 304}
 305
 306/* Called within rcu_read_lock().  */
 307static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 308{
 309    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 310    hwaddr pa = offsetof(VRingAvail, ring[i]);
 311    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 312}
 313
 314/* Called within rcu_read_lock().  */
 315static inline uint16_t vring_get_used_event(VirtQueue *vq)
 316{
 317    return vring_avail_ring(vq, vq->vring.num);
 318}
 319
 320/* Called within rcu_read_lock().  */
 321static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 322                                    int i)
 323{
 324    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 325    hwaddr pa = offsetof(VRingUsed, ring[i]);
 326    virtio_tswap32s(vq->vdev, &uelem->id);
 327    virtio_tswap32s(vq->vdev, &uelem->len);
 328    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 329    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 330}
 331
 332/* Called within rcu_read_lock().  */
 333static uint16_t vring_used_idx(VirtQueue *vq)
 334{
 335    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 336    hwaddr pa = offsetof(VRingUsed, idx);
 337    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 338}
 339
 340/* Called within rcu_read_lock().  */
 341static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 342{
 343    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 344    hwaddr pa = offsetof(VRingUsed, idx);
 345    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 346    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 347    vq->used_idx = val;
 348}
 349
 350/* Called within rcu_read_lock().  */
 351static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 352{
 353    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 354    VirtIODevice *vdev = vq->vdev;
 355    hwaddr pa = offsetof(VRingUsed, flags);
 356    uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 357
 358    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 359    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 360}
 361
 362/* Called within rcu_read_lock().  */
 363static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 364{
 365    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 366    VirtIODevice *vdev = vq->vdev;
 367    hwaddr pa = offsetof(VRingUsed, flags);
 368    uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 369
 370    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 371    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 372}
 373
 374/* Called within rcu_read_lock().  */
 375static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 376{
 377    VRingMemoryRegionCaches *caches;
 378    hwaddr pa;
 379    if (!vq->notification) {
 380        return;
 381    }
 382
 383    caches = vring_get_region_caches(vq);
 384    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 385    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 386    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 387}
 388
 389static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 390{
 391    RCU_READ_LOCK_GUARD();
 392
 393    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 394        vring_set_avail_event(vq, vring_avail_idx(vq));
 395    } else if (enable) {
 396        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 397    } else {
 398        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 399    }
 400    if (enable) {
 401        /* Expose avail event/used flags before caller checks the avail idx. */
 402        smp_mb();
 403    }
 404}
 405
 406static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 407{
 408    uint16_t off_wrap;
 409    VRingPackedDescEvent e;
 410    VRingMemoryRegionCaches *caches;
 411
 412    RCU_READ_LOCK_GUARD();
 413    caches  = vring_get_region_caches(vq);
 414    vring_packed_event_read(vq->vdev, &caches->used, &e);
 415
 416    if (!enable) {
 417        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 418    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 419        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 420        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 421        /* Make sure off_wrap is wrote before flags */
 422        smp_wmb();
 423        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 424    } else {
 425        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 426    }
 427
 428    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 429    if (enable) {
 430        /* Expose avail event/used flags before caller checks the avail idx. */
 431        smp_mb();
 432    }
 433}
 434
 435void virtio_queue_set_notification(VirtQueue *vq, int enable)
 436{
 437    vq->notification = enable;
 438
 439    if (!vq->vring.desc) {
 440        return;
 441    }
 442
 443    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 444        virtio_queue_packed_set_notification(vq, enable);
 445    } else {
 446        virtio_queue_split_set_notification(vq, enable);
 447    }
 448}
 449
 450int virtio_queue_ready(VirtQueue *vq)
 451{
 452    return vq->vring.avail != 0;
 453}
 454
 455static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 456                                         uint16_t *flags,
 457                                         MemoryRegionCache *cache,
 458                                         int i)
 459{
 460    address_space_read_cached(cache,
 461                              i * sizeof(VRingPackedDesc) +
 462                              offsetof(VRingPackedDesc, flags),
 463                              flags, sizeof(*flags));
 464    virtio_tswap16s(vdev, flags);
 465}
 466
 467static void vring_packed_desc_read(VirtIODevice *vdev,
 468                                   VRingPackedDesc *desc,
 469                                   MemoryRegionCache *cache,
 470                                   int i, bool strict_order)
 471{
 472    hwaddr off = i * sizeof(VRingPackedDesc);
 473
 474    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 475
 476    if (strict_order) {
 477        /* Make sure flags is read before the rest fields. */
 478        smp_rmb();
 479    }
 480
 481    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 482                              &desc->addr, sizeof(desc->addr));
 483    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 484                              &desc->id, sizeof(desc->id));
 485    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 486                              &desc->len, sizeof(desc->len));
 487    virtio_tswap64s(vdev, &desc->addr);
 488    virtio_tswap16s(vdev, &desc->id);
 489    virtio_tswap32s(vdev, &desc->len);
 490}
 491
 492static void vring_packed_desc_write_data(VirtIODevice *vdev,
 493                                         VRingPackedDesc *desc,
 494                                         MemoryRegionCache *cache,
 495                                         int i)
 496{
 497    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 498                    offsetof(VRingPackedDesc, id);
 499    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 500                    offsetof(VRingPackedDesc, len);
 501
 502    virtio_tswap32s(vdev, &desc->len);
 503    virtio_tswap16s(vdev, &desc->id);
 504    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 505    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 506    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 507    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 508}
 509
 510static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 511                                          VRingPackedDesc *desc,
 512                                          MemoryRegionCache *cache,
 513                                          int i)
 514{
 515    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 516
 517    virtio_tswap16s(vdev, &desc->flags);
 518    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
 519    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 520}
 521
 522static void vring_packed_desc_write(VirtIODevice *vdev,
 523                                    VRingPackedDesc *desc,
 524                                    MemoryRegionCache *cache,
 525                                    int i, bool strict_order)
 526{
 527    vring_packed_desc_write_data(vdev, desc, cache, i);
 528    if (strict_order) {
 529        /* Make sure data is wrote before flags. */
 530        smp_wmb();
 531    }
 532    vring_packed_desc_write_flags(vdev, desc, cache, i);
 533}
 534
 535static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 536{
 537    bool avail, used;
 538
 539    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 540    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 541    return (avail != used) && (avail == wrap_counter);
 542}
 543
 544/* Fetch avail_idx from VQ memory only when we really need to know if
 545 * guest has added some buffers.
 546 * Called within rcu_read_lock().  */
 547static int virtio_queue_empty_rcu(VirtQueue *vq)
 548{
 549    if (unlikely(vq->vdev->broken)) {
 550        return 1;
 551    }
 552
 553    if (unlikely(!vq->vring.avail)) {
 554        return 1;
 555    }
 556
 557    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 558        return 0;
 559    }
 560
 561    return vring_avail_idx(vq) == vq->last_avail_idx;
 562}
 563
 564static int virtio_queue_split_empty(VirtQueue *vq)
 565{
 566    bool empty;
 567
 568    if (unlikely(vq->vdev->broken)) {
 569        return 1;
 570    }
 571
 572    if (unlikely(!vq->vring.avail)) {
 573        return 1;
 574    }
 575
 576    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 577        return 0;
 578    }
 579
 580    RCU_READ_LOCK_GUARD();
 581    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 582    return empty;
 583}
 584
 585static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 586{
 587    struct VRingPackedDesc desc;
 588    VRingMemoryRegionCaches *cache;
 589
 590    if (unlikely(!vq->vring.desc)) {
 591        return 1;
 592    }
 593
 594    cache = vring_get_region_caches(vq);
 595    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 596                                 vq->last_avail_idx);
 597
 598    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 599}
 600
 601static int virtio_queue_packed_empty(VirtQueue *vq)
 602{
 603    RCU_READ_LOCK_GUARD();
 604    return virtio_queue_packed_empty_rcu(vq);
 605}
 606
 607int virtio_queue_empty(VirtQueue *vq)
 608{
 609    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 610        return virtio_queue_packed_empty(vq);
 611    } else {
 612        return virtio_queue_split_empty(vq);
 613    }
 614}
 615
 616static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 617                               unsigned int len)
 618{
 619    AddressSpace *dma_as = vq->vdev->dma_as;
 620    unsigned int offset;
 621    int i;
 622
 623    offset = 0;
 624    for (i = 0; i < elem->in_num; i++) {
 625        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 626
 627        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 628                         elem->in_sg[i].iov_len,
 629                         DMA_DIRECTION_FROM_DEVICE, size);
 630
 631        offset += size;
 632    }
 633
 634    for (i = 0; i < elem->out_num; i++)
 635        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 636                         elem->out_sg[i].iov_len,
 637                         DMA_DIRECTION_TO_DEVICE,
 638                         elem->out_sg[i].iov_len);
 639}
 640
 641/* virtqueue_detach_element:
 642 * @vq: The #VirtQueue
 643 * @elem: The #VirtQueueElement
 644 * @len: number of bytes written
 645 *
 646 * Detach the element from the virtqueue.  This function is suitable for device
 647 * reset or other situations where a #VirtQueueElement is simply freed and will
 648 * not be pushed or discarded.
 649 */
 650void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 651                              unsigned int len)
 652{
 653    vq->inuse -= elem->ndescs;
 654    virtqueue_unmap_sg(vq, elem, len);
 655}
 656
 657static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 658{
 659    vq->last_avail_idx -= num;
 660}
 661
 662static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 663{
 664    if (vq->last_avail_idx < num) {
 665        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 666        vq->last_avail_wrap_counter ^= 1;
 667    } else {
 668        vq->last_avail_idx -= num;
 669    }
 670}
 671
 672/* virtqueue_unpop:
 673 * @vq: The #VirtQueue
 674 * @elem: The #VirtQueueElement
 675 * @len: number of bytes written
 676 *
 677 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 678 * call to virtqueue_pop() will refetch the element.
 679 */
 680void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 681                     unsigned int len)
 682{
 683
 684    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 685        virtqueue_packed_rewind(vq, 1);
 686    } else {
 687        virtqueue_split_rewind(vq, 1);
 688    }
 689
 690    virtqueue_detach_element(vq, elem, len);
 691}
 692
 693/* virtqueue_rewind:
 694 * @vq: The #VirtQueue
 695 * @num: Number of elements to push back
 696 *
 697 * Pretend that elements weren't popped from the virtqueue.  The next
 698 * virtqueue_pop() will refetch the oldest element.
 699 *
 700 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 701 *
 702 * Returns: true on success, false if @num is greater than the number of in use
 703 * elements.
 704 */
 705bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 706{
 707    if (num > vq->inuse) {
 708        return false;
 709    }
 710
 711    vq->inuse -= num;
 712    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 713        virtqueue_packed_rewind(vq, num);
 714    } else {
 715        virtqueue_split_rewind(vq, num);
 716    }
 717    return true;
 718}
 719
 720static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 721                    unsigned int len, unsigned int idx)
 722{
 723    VRingUsedElem uelem;
 724
 725    if (unlikely(!vq->vring.used)) {
 726        return;
 727    }
 728
 729    idx = (idx + vq->used_idx) % vq->vring.num;
 730
 731    uelem.id = elem->index;
 732    uelem.len = len;
 733    vring_used_write(vq, &uelem, idx);
 734}
 735
 736static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 737                                  unsigned int len, unsigned int idx)
 738{
 739    vq->used_elems[idx].index = elem->index;
 740    vq->used_elems[idx].len = len;
 741    vq->used_elems[idx].ndescs = elem->ndescs;
 742}
 743
 744static void virtqueue_packed_fill_desc(VirtQueue *vq,
 745                                       const VirtQueueElement *elem,
 746                                       unsigned int idx,
 747                                       bool strict_order)
 748{
 749    uint16_t head;
 750    VRingMemoryRegionCaches *caches;
 751    VRingPackedDesc desc = {
 752        .id = elem->index,
 753        .len = elem->len,
 754    };
 755    bool wrap_counter = vq->used_wrap_counter;
 756
 757    if (unlikely(!vq->vring.desc)) {
 758        return;
 759    }
 760
 761    head = vq->used_idx + idx;
 762    if (head >= vq->vring.num) {
 763        head -= vq->vring.num;
 764        wrap_counter ^= 1;
 765    }
 766    if (wrap_counter) {
 767        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 768        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 769    } else {
 770        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 771        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 772    }
 773
 774    caches = vring_get_region_caches(vq);
 775    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 776}
 777
 778/* Called within rcu_read_lock().  */
 779void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 780                    unsigned int len, unsigned int idx)
 781{
 782    trace_virtqueue_fill(vq, elem, len, idx);
 783
 784    virtqueue_unmap_sg(vq, elem, len);
 785
 786    if (unlikely(vq->vdev->broken)) {
 787        return;
 788    }
 789
 790    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 791        virtqueue_packed_fill(vq, elem, len, idx);
 792    } else {
 793        virtqueue_split_fill(vq, elem, len, idx);
 794    }
 795}
 796
 797/* Called within rcu_read_lock().  */
 798static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 799{
 800    uint16_t old, new;
 801
 802    if (unlikely(!vq->vring.used)) {
 803        return;
 804    }
 805
 806    /* Make sure buffer is written before we update index. */
 807    smp_wmb();
 808    trace_virtqueue_flush(vq, count);
 809    old = vq->used_idx;
 810    new = old + count;
 811    vring_used_idx_set(vq, new);
 812    vq->inuse -= count;
 813    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 814        vq->signalled_used_valid = false;
 815}
 816
 817static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 818{
 819    unsigned int i, ndescs = 0;
 820
 821    if (unlikely(!vq->vring.desc)) {
 822        return;
 823    }
 824
 825    for (i = 1; i < count; i++) {
 826        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 827        ndescs += vq->used_elems[i].ndescs;
 828    }
 829    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 830    ndescs += vq->used_elems[0].ndescs;
 831
 832    vq->inuse -= ndescs;
 833    vq->used_idx += ndescs;
 834    if (vq->used_idx >= vq->vring.num) {
 835        vq->used_idx -= vq->vring.num;
 836        vq->used_wrap_counter ^= 1;
 837    }
 838}
 839
 840void virtqueue_flush(VirtQueue *vq, unsigned int count)
 841{
 842    if (unlikely(vq->vdev->broken)) {
 843        vq->inuse -= count;
 844        return;
 845    }
 846
 847    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 848        virtqueue_packed_flush(vq, count);
 849    } else {
 850        virtqueue_split_flush(vq, count);
 851    }
 852}
 853
 854void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 855                    unsigned int len)
 856{
 857    RCU_READ_LOCK_GUARD();
 858    virtqueue_fill(vq, elem, len, 0);
 859    virtqueue_flush(vq, 1);
 860}
 861
 862/* Called within rcu_read_lock().  */
 863static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 864{
 865    uint16_t num_heads = vring_avail_idx(vq) - idx;
 866
 867    /* Check it isn't doing very strange things with descriptor numbers. */
 868    if (num_heads > vq->vring.num) {
 869        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 870                     idx, vq->shadow_avail_idx);
 871        return -EINVAL;
 872    }
 873    /* On success, callers read a descriptor at vq->last_avail_idx.
 874     * Make sure descriptor read does not bypass avail index read. */
 875    if (num_heads) {
 876        smp_rmb();
 877    }
 878
 879    return num_heads;
 880}
 881
 882/* Called within rcu_read_lock().  */
 883static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 884                               unsigned int *head)
 885{
 886    /* Grab the next descriptor number they're advertising, and increment
 887     * the index we've seen. */
 888    *head = vring_avail_ring(vq, idx % vq->vring.num);
 889
 890    /* If their number is silly, that's a fatal mistake. */
 891    if (*head >= vq->vring.num) {
 892        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 893        return false;
 894    }
 895
 896    return true;
 897}
 898
 899enum {
 900    VIRTQUEUE_READ_DESC_ERROR = -1,
 901    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 902    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 903};
 904
 905static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 906                                          MemoryRegionCache *desc_cache,
 907                                          unsigned int max, unsigned int *next)
 908{
 909    /* If this descriptor says it doesn't chain, we're done. */
 910    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 911        return VIRTQUEUE_READ_DESC_DONE;
 912    }
 913
 914    /* Check they're not leading us off end of descriptors. */
 915    *next = desc->next;
 916    /* Make sure compiler knows to grab that: we don't want it changing! */
 917    smp_wmb();
 918
 919    if (*next >= max) {
 920        virtio_error(vdev, "Desc next is %u", *next);
 921        return VIRTQUEUE_READ_DESC_ERROR;
 922    }
 923
 924    vring_split_desc_read(vdev, desc, desc_cache, *next);
 925    return VIRTQUEUE_READ_DESC_MORE;
 926}
 927
 928static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
 929                            unsigned int *in_bytes, unsigned int *out_bytes,
 930                            unsigned max_in_bytes, unsigned max_out_bytes)
 931{
 932    VirtIODevice *vdev = vq->vdev;
 933    unsigned int max, idx;
 934    unsigned int total_bufs, in_total, out_total;
 935    VRingMemoryRegionCaches *caches;
 936    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 937    int64_t len = 0;
 938    int rc;
 939
 940    RCU_READ_LOCK_GUARD();
 941
 942    idx = vq->last_avail_idx;
 943    total_bufs = in_total = out_total = 0;
 944
 945    max = vq->vring.num;
 946    caches = vring_get_region_caches(vq);
 947    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 948        MemoryRegionCache *desc_cache = &caches->desc;
 949        unsigned int num_bufs;
 950        VRingDesc desc;
 951        unsigned int i;
 952
 953        num_bufs = total_bufs;
 954
 955        if (!virtqueue_get_head(vq, idx++, &i)) {
 956            goto err;
 957        }
 958
 959        vring_split_desc_read(vdev, &desc, desc_cache, i);
 960
 961        if (desc.flags & VRING_DESC_F_INDIRECT) {
 962            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
 963                virtio_error(vdev, "Invalid size for indirect buffer table");
 964                goto err;
 965            }
 966
 967            /* If we've got too many, that implies a descriptor loop. */
 968            if (num_bufs >= max) {
 969                virtio_error(vdev, "Looped descriptor");
 970                goto err;
 971            }
 972
 973            /* loop over the indirect descriptor table */
 974            len = address_space_cache_init(&indirect_desc_cache,
 975                                           vdev->dma_as,
 976                                           desc.addr, desc.len, false);
 977            desc_cache = &indirect_desc_cache;
 978            if (len < desc.len) {
 979                virtio_error(vdev, "Cannot map indirect buffer");
 980                goto err;
 981            }
 982
 983            max = desc.len / sizeof(VRingDesc);
 984            num_bufs = i = 0;
 985            vring_split_desc_read(vdev, &desc, desc_cache, i);
 986        }
 987
 988        do {
 989            /* If we've got too many, that implies a descriptor loop. */
 990            if (++num_bufs > max) {
 991                virtio_error(vdev, "Looped descriptor");
 992                goto err;
 993            }
 994
 995            if (desc.flags & VRING_DESC_F_WRITE) {
 996                in_total += desc.len;
 997            } else {
 998                out_total += desc.len;
 999            }
1000            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1001                goto done;
1002            }
1003
1004            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1005        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1006
1007        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1008            goto err;
1009        }
1010
1011        if (desc_cache == &indirect_desc_cache) {
1012            address_space_cache_destroy(&indirect_desc_cache);
1013            total_bufs++;
1014        } else {
1015            total_bufs = num_bufs;
1016        }
1017    }
1018
1019    if (rc < 0) {
1020        goto err;
1021    }
1022
1023done:
1024    address_space_cache_destroy(&indirect_desc_cache);
1025    if (in_bytes) {
1026        *in_bytes = in_total;
1027    }
1028    if (out_bytes) {
1029        *out_bytes = out_total;
1030    }
1031    return;
1032
1033err:
1034    in_total = out_total = 0;
1035    goto done;
1036}
1037
1038static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1039                                           VRingPackedDesc *desc,
1040                                           MemoryRegionCache
1041                                           *desc_cache,
1042                                           unsigned int max,
1043                                           unsigned int *next,
1044                                           bool indirect)
1045{
1046    /* If this descriptor says it doesn't chain, we're done. */
1047    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1048        return VIRTQUEUE_READ_DESC_DONE;
1049    }
1050
1051    ++*next;
1052    if (*next == max) {
1053        if (indirect) {
1054            return VIRTQUEUE_READ_DESC_DONE;
1055        } else {
1056            (*next) -= vq->vring.num;
1057        }
1058    }
1059
1060    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1061    return VIRTQUEUE_READ_DESC_MORE;
1062}
1063
1064static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1065                                             unsigned int *in_bytes,
1066                                             unsigned int *out_bytes,
1067                                             unsigned max_in_bytes,
1068                                             unsigned max_out_bytes)
1069{
1070    VirtIODevice *vdev = vq->vdev;
1071    unsigned int max, idx;
1072    unsigned int total_bufs, in_total, out_total;
1073    MemoryRegionCache *desc_cache;
1074    VRingMemoryRegionCaches *caches;
1075    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1076    int64_t len = 0;
1077    VRingPackedDesc desc;
1078    bool wrap_counter;
1079
1080    RCU_READ_LOCK_GUARD();
1081    idx = vq->last_avail_idx;
1082    wrap_counter = vq->last_avail_wrap_counter;
1083    total_bufs = in_total = out_total = 0;
1084
1085    max = vq->vring.num;
1086    caches = vring_get_region_caches(vq);
1087
1088    for (;;) {
1089        unsigned int num_bufs = total_bufs;
1090        unsigned int i = idx;
1091        int rc;
1092
1093        desc_cache = &caches->desc;
1094        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1095        if (!is_desc_avail(desc.flags, wrap_counter)) {
1096            break;
1097        }
1098
1099        if (desc.flags & VRING_DESC_F_INDIRECT) {
1100            if (desc.len % sizeof(VRingPackedDesc)) {
1101                virtio_error(vdev, "Invalid size for indirect buffer table");
1102                goto err;
1103            }
1104
1105            /* If we've got too many, that implies a descriptor loop. */
1106            if (num_bufs >= max) {
1107                virtio_error(vdev, "Looped descriptor");
1108                goto err;
1109            }
1110
1111            /* loop over the indirect descriptor table */
1112            len = address_space_cache_init(&indirect_desc_cache,
1113                                           vdev->dma_as,
1114                                           desc.addr, desc.len, false);
1115            desc_cache = &indirect_desc_cache;
1116            if (len < desc.len) {
1117                virtio_error(vdev, "Cannot map indirect buffer");
1118                goto err;
1119            }
1120
1121            max = desc.len / sizeof(VRingPackedDesc);
1122            num_bufs = i = 0;
1123            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1124        }
1125
1126        do {
1127            /* If we've got too many, that implies a descriptor loop. */
1128            if (++num_bufs > max) {
1129                virtio_error(vdev, "Looped descriptor");
1130                goto err;
1131            }
1132
1133            if (desc.flags & VRING_DESC_F_WRITE) {
1134                in_total += desc.len;
1135            } else {
1136                out_total += desc.len;
1137            }
1138            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1139                goto done;
1140            }
1141
1142            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1143                                                 &i, desc_cache ==
1144                                                 &indirect_desc_cache);
1145        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1146
1147        if (desc_cache == &indirect_desc_cache) {
1148            address_space_cache_destroy(&indirect_desc_cache);
1149            total_bufs++;
1150            idx++;
1151        } else {
1152            idx += num_bufs - total_bufs;
1153            total_bufs = num_bufs;
1154        }
1155
1156        if (idx >= vq->vring.num) {
1157            idx -= vq->vring.num;
1158            wrap_counter ^= 1;
1159        }
1160    }
1161
1162    /* Record the index and wrap counter for a kick we want */
1163    vq->shadow_avail_idx = idx;
1164    vq->shadow_avail_wrap_counter = wrap_counter;
1165done:
1166    address_space_cache_destroy(&indirect_desc_cache);
1167    if (in_bytes) {
1168        *in_bytes = in_total;
1169    }
1170    if (out_bytes) {
1171        *out_bytes = out_total;
1172    }
1173    return;
1174
1175err:
1176    in_total = out_total = 0;
1177    goto done;
1178}
1179
1180void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1181                               unsigned int *out_bytes,
1182                               unsigned max_in_bytes, unsigned max_out_bytes)
1183{
1184    uint16_t desc_size;
1185    VRingMemoryRegionCaches *caches;
1186
1187    if (unlikely(!vq->vring.desc)) {
1188        goto err;
1189    }
1190
1191    caches = vring_get_region_caches(vq);
1192    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1193                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1194    if (caches->desc.len < vq->vring.num * desc_size) {
1195        virtio_error(vq->vdev, "Cannot map descriptor ring");
1196        goto err;
1197    }
1198
1199    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1200        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1201                                         max_in_bytes, max_out_bytes);
1202    } else {
1203        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1204                                        max_in_bytes, max_out_bytes);
1205    }
1206
1207    return;
1208err:
1209    if (in_bytes) {
1210        *in_bytes = 0;
1211    }
1212    if (out_bytes) {
1213        *out_bytes = 0;
1214    }
1215}
1216
1217int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1218                          unsigned int out_bytes)
1219{
1220    unsigned int in_total, out_total;
1221
1222    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1223    return in_bytes <= in_total && out_bytes <= out_total;
1224}
1225
1226static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1227                               hwaddr *addr, struct iovec *iov,
1228                               unsigned int max_num_sg, bool is_write,
1229                               hwaddr pa, size_t sz)
1230{
1231    bool ok = false;
1232    unsigned num_sg = *p_num_sg;
1233    assert(num_sg <= max_num_sg);
1234
1235    if (!sz) {
1236        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1237        goto out;
1238    }
1239
1240    while (sz) {
1241        hwaddr len = sz;
1242
1243        if (num_sg == max_num_sg) {
1244            virtio_error(vdev, "virtio: too many write descriptors in "
1245                               "indirect table");
1246            goto out;
1247        }
1248
1249        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1250                                              is_write ?
1251                                              DMA_DIRECTION_FROM_DEVICE :
1252                                              DMA_DIRECTION_TO_DEVICE);
1253        if (!iov[num_sg].iov_base) {
1254            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1255            goto out;
1256        }
1257
1258        iov[num_sg].iov_len = len;
1259        addr[num_sg] = pa;
1260
1261        sz -= len;
1262        pa += len;
1263        num_sg++;
1264    }
1265    ok = true;
1266
1267out:
1268    *p_num_sg = num_sg;
1269    return ok;
1270}
1271
1272/* Only used by error code paths before we have a VirtQueueElement (therefore
1273 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1274 * yet.
1275 */
1276static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1277                                    struct iovec *iov)
1278{
1279    unsigned int i;
1280
1281    for (i = 0; i < out_num + in_num; i++) {
1282        int is_write = i >= out_num;
1283
1284        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1285        iov++;
1286    }
1287}
1288
1289static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1290                                hwaddr *addr, unsigned int num_sg,
1291                                int is_write)
1292{
1293    unsigned int i;
1294    hwaddr len;
1295
1296    for (i = 0; i < num_sg; i++) {
1297        len = sg[i].iov_len;
1298        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1299                                        addr[i], &len, is_write ?
1300                                        DMA_DIRECTION_FROM_DEVICE :
1301                                        DMA_DIRECTION_TO_DEVICE);
1302        if (!sg[i].iov_base) {
1303            error_report("virtio: error trying to map MMIO memory");
1304            exit(1);
1305        }
1306        if (len != sg[i].iov_len) {
1307            error_report("virtio: unexpected memory split");
1308            exit(1);
1309        }
1310    }
1311}
1312
1313void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1314{
1315    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, 1);
1316    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 0);
1317}
1318
1319static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1320{
1321    VirtQueueElement *elem;
1322    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1323    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1324    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1325    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1326    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1327    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1328
1329    assert(sz >= sizeof(VirtQueueElement));
1330    elem = g_malloc(out_sg_end);
1331    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1332    elem->out_num = out_num;
1333    elem->in_num = in_num;
1334    elem->in_addr = (void *)elem + in_addr_ofs;
1335    elem->out_addr = (void *)elem + out_addr_ofs;
1336    elem->in_sg = (void *)elem + in_sg_ofs;
1337    elem->out_sg = (void *)elem + out_sg_ofs;
1338    return elem;
1339}
1340
1341static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1342{
1343    unsigned int i, head, max;
1344    VRingMemoryRegionCaches *caches;
1345    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1346    MemoryRegionCache *desc_cache;
1347    int64_t len;
1348    VirtIODevice *vdev = vq->vdev;
1349    VirtQueueElement *elem = NULL;
1350    unsigned out_num, in_num, elem_entries;
1351    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1352    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1353    VRingDesc desc;
1354    int rc;
1355
1356    RCU_READ_LOCK_GUARD();
1357    if (virtio_queue_empty_rcu(vq)) {
1358        goto done;
1359    }
1360    /* Needed after virtio_queue_empty(), see comment in
1361     * virtqueue_num_heads(). */
1362    smp_rmb();
1363
1364    /* When we start there are none of either input nor output. */
1365    out_num = in_num = elem_entries = 0;
1366
1367    max = vq->vring.num;
1368
1369    if (vq->inuse >= vq->vring.num) {
1370        virtio_error(vdev, "Virtqueue size exceeded");
1371        goto done;
1372    }
1373
1374    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1375        goto done;
1376    }
1377
1378    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1379        vring_set_avail_event(vq, vq->last_avail_idx);
1380    }
1381
1382    i = head;
1383
1384    caches = vring_get_region_caches(vq);
1385    if (caches->desc.len < max * sizeof(VRingDesc)) {
1386        virtio_error(vdev, "Cannot map descriptor ring");
1387        goto done;
1388    }
1389
1390    desc_cache = &caches->desc;
1391    vring_split_desc_read(vdev, &desc, desc_cache, i);
1392    if (desc.flags & VRING_DESC_F_INDIRECT) {
1393        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1394            virtio_error(vdev, "Invalid size for indirect buffer table");
1395            goto done;
1396        }
1397
1398        /* loop over the indirect descriptor table */
1399        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1400                                       desc.addr, desc.len, false);
1401        desc_cache = &indirect_desc_cache;
1402        if (len < desc.len) {
1403            virtio_error(vdev, "Cannot map indirect buffer");
1404            goto done;
1405        }
1406
1407        max = desc.len / sizeof(VRingDesc);
1408        i = 0;
1409        vring_split_desc_read(vdev, &desc, desc_cache, i);
1410    }
1411
1412    /* Collect all the descriptors */
1413    do {
1414        bool map_ok;
1415
1416        if (desc.flags & VRING_DESC_F_WRITE) {
1417            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1418                                        iov + out_num,
1419                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1420                                        desc.addr, desc.len);
1421        } else {
1422            if (in_num) {
1423                virtio_error(vdev, "Incorrect order for descriptors");
1424                goto err_undo_map;
1425            }
1426            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1427                                        VIRTQUEUE_MAX_SIZE, false,
1428                                        desc.addr, desc.len);
1429        }
1430        if (!map_ok) {
1431            goto err_undo_map;
1432        }
1433
1434        /* If we've got too many, that implies a descriptor loop. */
1435        if (++elem_entries > max) {
1436            virtio_error(vdev, "Looped descriptor");
1437            goto err_undo_map;
1438        }
1439
1440        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1441    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1442
1443    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1444        goto err_undo_map;
1445    }
1446
1447    /* Now copy what we have collected and mapped */
1448    elem = virtqueue_alloc_element(sz, out_num, in_num);
1449    elem->index = head;
1450    elem->ndescs = 1;
1451    for (i = 0; i < out_num; i++) {
1452        elem->out_addr[i] = addr[i];
1453        elem->out_sg[i] = iov[i];
1454    }
1455    for (i = 0; i < in_num; i++) {
1456        elem->in_addr[i] = addr[out_num + i];
1457        elem->in_sg[i] = iov[out_num + i];
1458    }
1459
1460    vq->inuse++;
1461
1462    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1463done:
1464    address_space_cache_destroy(&indirect_desc_cache);
1465
1466    return elem;
1467
1468err_undo_map:
1469    virtqueue_undo_map_desc(out_num, in_num, iov);
1470    goto done;
1471}
1472
1473static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1474{
1475    unsigned int i, max;
1476    VRingMemoryRegionCaches *caches;
1477    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1478    MemoryRegionCache *desc_cache;
1479    int64_t len;
1480    VirtIODevice *vdev = vq->vdev;
1481    VirtQueueElement *elem = NULL;
1482    unsigned out_num, in_num, elem_entries;
1483    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1484    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1485    VRingPackedDesc desc;
1486    uint16_t id;
1487    int rc;
1488
1489    RCU_READ_LOCK_GUARD();
1490    if (virtio_queue_packed_empty_rcu(vq)) {
1491        goto done;
1492    }
1493
1494    /* When we start there are none of either input nor output. */
1495    out_num = in_num = elem_entries = 0;
1496
1497    max = vq->vring.num;
1498
1499    if (vq->inuse >= vq->vring.num) {
1500        virtio_error(vdev, "Virtqueue size exceeded");
1501        goto done;
1502    }
1503
1504    i = vq->last_avail_idx;
1505
1506    caches = vring_get_region_caches(vq);
1507    if (caches->desc.len < max * sizeof(VRingDesc)) {
1508        virtio_error(vdev, "Cannot map descriptor ring");
1509        goto done;
1510    }
1511
1512    desc_cache = &caches->desc;
1513    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1514    id = desc.id;
1515    if (desc.flags & VRING_DESC_F_INDIRECT) {
1516        if (desc.len % sizeof(VRingPackedDesc)) {
1517            virtio_error(vdev, "Invalid size for indirect buffer table");
1518            goto done;
1519        }
1520
1521        /* loop over the indirect descriptor table */
1522        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1523                                       desc.addr, desc.len, false);
1524        desc_cache = &indirect_desc_cache;
1525        if (len < desc.len) {
1526            virtio_error(vdev, "Cannot map indirect buffer");
1527            goto done;
1528        }
1529
1530        max = desc.len / sizeof(VRingPackedDesc);
1531        i = 0;
1532        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1533    }
1534
1535    /* Collect all the descriptors */
1536    do {
1537        bool map_ok;
1538
1539        if (desc.flags & VRING_DESC_F_WRITE) {
1540            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1541                                        iov + out_num,
1542                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1543                                        desc.addr, desc.len);
1544        } else {
1545            if (in_num) {
1546                virtio_error(vdev, "Incorrect order for descriptors");
1547                goto err_undo_map;
1548            }
1549            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1550                                        VIRTQUEUE_MAX_SIZE, false,
1551                                        desc.addr, desc.len);
1552        }
1553        if (!map_ok) {
1554            goto err_undo_map;
1555        }
1556
1557        /* If we've got too many, that implies a descriptor loop. */
1558        if (++elem_entries > max) {
1559            virtio_error(vdev, "Looped descriptor");
1560            goto err_undo_map;
1561        }
1562
1563        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1564                                             desc_cache ==
1565                                             &indirect_desc_cache);
1566    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1567
1568    /* Now copy what we have collected and mapped */
1569    elem = virtqueue_alloc_element(sz, out_num, in_num);
1570    for (i = 0; i < out_num; i++) {
1571        elem->out_addr[i] = addr[i];
1572        elem->out_sg[i] = iov[i];
1573    }
1574    for (i = 0; i < in_num; i++) {
1575        elem->in_addr[i] = addr[out_num + i];
1576        elem->in_sg[i] = iov[out_num + i];
1577    }
1578
1579    elem->index = id;
1580    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1581    vq->last_avail_idx += elem->ndescs;
1582    vq->inuse += elem->ndescs;
1583
1584    if (vq->last_avail_idx >= vq->vring.num) {
1585        vq->last_avail_idx -= vq->vring.num;
1586        vq->last_avail_wrap_counter ^= 1;
1587    }
1588
1589    vq->shadow_avail_idx = vq->last_avail_idx;
1590    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1591
1592    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1593done:
1594    address_space_cache_destroy(&indirect_desc_cache);
1595
1596    return elem;
1597
1598err_undo_map:
1599    virtqueue_undo_map_desc(out_num, in_num, iov);
1600    goto done;
1601}
1602
1603void *virtqueue_pop(VirtQueue *vq, size_t sz)
1604{
1605    if (unlikely(vq->vdev->broken)) {
1606        return NULL;
1607    }
1608
1609    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1610        return virtqueue_packed_pop(vq, sz);
1611    } else {
1612        return virtqueue_split_pop(vq, sz);
1613    }
1614}
1615
1616static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1617{
1618    VRingMemoryRegionCaches *caches;
1619    MemoryRegionCache *desc_cache;
1620    unsigned int dropped = 0;
1621    VirtQueueElement elem = {};
1622    VirtIODevice *vdev = vq->vdev;
1623    VRingPackedDesc desc;
1624
1625    caches = vring_get_region_caches(vq);
1626    desc_cache = &caches->desc;
1627
1628    virtio_queue_set_notification(vq, 0);
1629
1630    while (vq->inuse < vq->vring.num) {
1631        unsigned int idx = vq->last_avail_idx;
1632        /*
1633         * works similar to virtqueue_pop but does not map buffers
1634         * and does not allocate any memory.
1635         */
1636        vring_packed_desc_read(vdev, &desc, desc_cache,
1637                               vq->last_avail_idx , true);
1638        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1639            break;
1640        }
1641        elem.index = desc.id;
1642        elem.ndescs = 1;
1643        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1644                                               vq->vring.num, &idx, false)) {
1645            ++elem.ndescs;
1646        }
1647        /*
1648         * immediately push the element, nothing to unmap
1649         * as both in_num and out_num are set to 0.
1650         */
1651        virtqueue_push(vq, &elem, 0);
1652        dropped++;
1653        vq->last_avail_idx += elem.ndescs;
1654        if (vq->last_avail_idx >= vq->vring.num) {
1655            vq->last_avail_idx -= vq->vring.num;
1656            vq->last_avail_wrap_counter ^= 1;
1657        }
1658    }
1659
1660    return dropped;
1661}
1662
1663static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1664{
1665    unsigned int dropped = 0;
1666    VirtQueueElement elem = {};
1667    VirtIODevice *vdev = vq->vdev;
1668    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1669
1670    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1671        /* works similar to virtqueue_pop but does not map buffers
1672        * and does not allocate any memory */
1673        smp_rmb();
1674        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1675            break;
1676        }
1677        vq->inuse++;
1678        vq->last_avail_idx++;
1679        if (fEventIdx) {
1680            vring_set_avail_event(vq, vq->last_avail_idx);
1681        }
1682        /* immediately push the element, nothing to unmap
1683         * as both in_num and out_num are set to 0 */
1684        virtqueue_push(vq, &elem, 0);
1685        dropped++;
1686    }
1687
1688    return dropped;
1689}
1690
1691/* virtqueue_drop_all:
1692 * @vq: The #VirtQueue
1693 * Drops all queued buffers and indicates them to the guest
1694 * as if they are done. Useful when buffers can not be
1695 * processed but must be returned to the guest.
1696 */
1697unsigned int virtqueue_drop_all(VirtQueue *vq)
1698{
1699    struct VirtIODevice *vdev = vq->vdev;
1700
1701    if (unlikely(vdev->broken)) {
1702        return 0;
1703    }
1704
1705    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1706        return virtqueue_packed_drop_all(vq);
1707    } else {
1708        return virtqueue_split_drop_all(vq);
1709    }
1710}
1711
1712/* Reading and writing a structure directly to QEMUFile is *awful*, but
1713 * it is what QEMU has always done by mistake.  We can change it sooner
1714 * or later by bumping the version number of the affected vm states.
1715 * In the meanwhile, since the in-memory layout of VirtQueueElement
1716 * has changed, we need to marshal to and from the layout that was
1717 * used before the change.
1718 */
1719typedef struct VirtQueueElementOld {
1720    unsigned int index;
1721    unsigned int out_num;
1722    unsigned int in_num;
1723    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1724    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1725    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1726    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1727} VirtQueueElementOld;
1728
1729void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1730{
1731    VirtQueueElement *elem;
1732    VirtQueueElementOld data;
1733    int i;
1734
1735    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1736
1737    /* TODO: teach all callers that this can fail, and return failure instead
1738     * of asserting here.
1739     * This is just one thing (there are probably more) that must be
1740     * fixed before we can allow NDEBUG compilation.
1741     */
1742    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1743    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1744
1745    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1746    elem->index = data.index;
1747
1748    for (i = 0; i < elem->in_num; i++) {
1749        elem->in_addr[i] = data.in_addr[i];
1750    }
1751
1752    for (i = 0; i < elem->out_num; i++) {
1753        elem->out_addr[i] = data.out_addr[i];
1754    }
1755
1756    for (i = 0; i < elem->in_num; i++) {
1757        /* Base is overwritten by virtqueue_map.  */
1758        elem->in_sg[i].iov_base = 0;
1759        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1760    }
1761
1762    for (i = 0; i < elem->out_num; i++) {
1763        /* Base is overwritten by virtqueue_map.  */
1764        elem->out_sg[i].iov_base = 0;
1765        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1766    }
1767
1768    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1769        qemu_get_be32s(f, &elem->ndescs);
1770    }
1771
1772    virtqueue_map(vdev, elem);
1773    return elem;
1774}
1775
1776void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1777                                VirtQueueElement *elem)
1778{
1779    VirtQueueElementOld data;
1780    int i;
1781
1782    memset(&data, 0, sizeof(data));
1783    data.index = elem->index;
1784    data.in_num = elem->in_num;
1785    data.out_num = elem->out_num;
1786
1787    for (i = 0; i < elem->in_num; i++) {
1788        data.in_addr[i] = elem->in_addr[i];
1789    }
1790
1791    for (i = 0; i < elem->out_num; i++) {
1792        data.out_addr[i] = elem->out_addr[i];
1793    }
1794
1795    for (i = 0; i < elem->in_num; i++) {
1796        /* Base is overwritten by virtqueue_map when loading.  Do not
1797         * save it, as it would leak the QEMU address space layout.  */
1798        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1799    }
1800
1801    for (i = 0; i < elem->out_num; i++) {
1802        /* Do not save iov_base as above.  */
1803        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1804    }
1805
1806    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1807        qemu_put_be32s(f, &elem->ndescs);
1808    }
1809
1810    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1811}
1812
1813/* virtio device */
1814static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1815{
1816    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1817    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1818
1819    if (unlikely(vdev->broken)) {
1820        return;
1821    }
1822
1823    if (k->notify) {
1824        k->notify(qbus->parent, vector);
1825    }
1826}
1827
1828void virtio_update_irq(VirtIODevice *vdev)
1829{
1830    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1831}
1832
1833static int virtio_validate_features(VirtIODevice *vdev)
1834{
1835    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1836
1837    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1838        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1839        return -EFAULT;
1840    }
1841
1842    if (k->validate_features) {
1843        return k->validate_features(vdev);
1844    } else {
1845        return 0;
1846    }
1847}
1848
1849int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1850{
1851    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1852    trace_virtio_set_status(vdev, val);
1853
1854    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1855        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1856            val & VIRTIO_CONFIG_S_FEATURES_OK) {
1857            int ret = virtio_validate_features(vdev);
1858
1859            if (ret) {
1860                return ret;
1861            }
1862        }
1863    }
1864
1865    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1866        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1867        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1868    }
1869
1870    if (k->set_status) {
1871        k->set_status(vdev, val);
1872    }
1873    vdev->status = val;
1874
1875    return 0;
1876}
1877
1878static enum virtio_device_endian virtio_default_endian(void)
1879{
1880    if (target_words_bigendian()) {
1881        return VIRTIO_DEVICE_ENDIAN_BIG;
1882    } else {
1883        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1884    }
1885}
1886
1887static enum virtio_device_endian virtio_current_cpu_endian(void)
1888{
1889    CPUClass *cc = CPU_GET_CLASS(current_cpu);
1890
1891    if (cc->virtio_is_big_endian(current_cpu)) {
1892        return VIRTIO_DEVICE_ENDIAN_BIG;
1893    } else {
1894        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1895    }
1896}
1897
1898void virtio_reset(void *opaque)
1899{
1900    VirtIODevice *vdev = opaque;
1901    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1902    int i;
1903
1904    virtio_set_status(vdev, 0);
1905    if (current_cpu) {
1906        /* Guest initiated reset */
1907        vdev->device_endian = virtio_current_cpu_endian();
1908    } else {
1909        /* System reset */
1910        vdev->device_endian = virtio_default_endian();
1911    }
1912
1913    if (k->reset) {
1914        k->reset(vdev);
1915    }
1916
1917    vdev->start_on_kick = false;
1918    vdev->started = false;
1919    vdev->broken = false;
1920    vdev->guest_features = 0;
1921    vdev->queue_sel = 0;
1922    vdev->status = 0;
1923    atomic_set(&vdev->isr, 0);
1924    vdev->config_vector = VIRTIO_NO_VECTOR;
1925    virtio_notify_vector(vdev, vdev->config_vector);
1926
1927    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1928        vdev->vq[i].vring.desc = 0;
1929        vdev->vq[i].vring.avail = 0;
1930        vdev->vq[i].vring.used = 0;
1931        vdev->vq[i].last_avail_idx = 0;
1932        vdev->vq[i].shadow_avail_idx = 0;
1933        vdev->vq[i].used_idx = 0;
1934        vdev->vq[i].last_avail_wrap_counter = true;
1935        vdev->vq[i].shadow_avail_wrap_counter = true;
1936        vdev->vq[i].used_wrap_counter = true;
1937        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
1938        vdev->vq[i].signalled_used = 0;
1939        vdev->vq[i].signalled_used_valid = false;
1940        vdev->vq[i].notification = true;
1941        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
1942        vdev->vq[i].inuse = 0;
1943        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
1944    }
1945}
1946
1947uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
1948{
1949    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1950    uint8_t val;
1951
1952    if (addr + sizeof(val) > vdev->config_len) {
1953        return (uint32_t)-1;
1954    }
1955
1956    k->get_config(vdev, vdev->config);
1957
1958    val = ldub_p(vdev->config + addr);
1959    return val;
1960}
1961
1962uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
1963{
1964    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1965    uint16_t val;
1966
1967    if (addr + sizeof(val) > vdev->config_len) {
1968        return (uint32_t)-1;
1969    }
1970
1971    k->get_config(vdev, vdev->config);
1972
1973    val = lduw_p(vdev->config + addr);
1974    return val;
1975}
1976
1977uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1978{
1979    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1980    uint32_t val;
1981
1982    if (addr + sizeof(val) > vdev->config_len) {
1983        return (uint32_t)-1;
1984    }
1985
1986    k->get_config(vdev, vdev->config);
1987
1988    val = ldl_p(vdev->config + addr);
1989    return val;
1990}
1991
1992void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1993{
1994    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1995    uint8_t val = data;
1996
1997    if (addr + sizeof(val) > vdev->config_len) {
1998        return;
1999    }
2000
2001    stb_p(vdev->config + addr, val);
2002
2003    if (k->set_config) {
2004        k->set_config(vdev, vdev->config);
2005    }
2006}
2007
2008void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2009{
2010    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2011    uint16_t val = data;
2012
2013    if (addr + sizeof(val) > vdev->config_len) {
2014        return;
2015    }
2016
2017    stw_p(vdev->config + addr, val);
2018
2019    if (k->set_config) {
2020        k->set_config(vdev, vdev->config);
2021    }
2022}
2023
2024void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2025{
2026    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2027    uint32_t val = data;
2028
2029    if (addr + sizeof(val) > vdev->config_len) {
2030        return;
2031    }
2032
2033    stl_p(vdev->config + addr, val);
2034
2035    if (k->set_config) {
2036        k->set_config(vdev, vdev->config);
2037    }
2038}
2039
2040uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2041{
2042    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2043    uint8_t val;
2044
2045    if (addr + sizeof(val) > vdev->config_len) {
2046        return (uint32_t)-1;
2047    }
2048
2049    k->get_config(vdev, vdev->config);
2050
2051    val = ldub_p(vdev->config + addr);
2052    return val;
2053}
2054
2055uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2056{
2057    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2058    uint16_t val;
2059
2060    if (addr + sizeof(val) > vdev->config_len) {
2061        return (uint32_t)-1;
2062    }
2063
2064    k->get_config(vdev, vdev->config);
2065
2066    val = lduw_le_p(vdev->config + addr);
2067    return val;
2068}
2069
2070uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2071{
2072    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2073    uint32_t val;
2074
2075    if (addr + sizeof(val) > vdev->config_len) {
2076        return (uint32_t)-1;
2077    }
2078
2079    k->get_config(vdev, vdev->config);
2080
2081    val = ldl_le_p(vdev->config + addr);
2082    return val;
2083}
2084
2085void virtio_config_modern_writeb(VirtIODevice *vdev,
2086                                 uint32_t addr, uint32_t data)
2087{
2088    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2089    uint8_t val = data;
2090
2091    if (addr + sizeof(val) > vdev->config_len) {
2092        return;
2093    }
2094
2095    stb_p(vdev->config + addr, val);
2096
2097    if (k->set_config) {
2098        k->set_config(vdev, vdev->config);
2099    }
2100}
2101
2102void virtio_config_modern_writew(VirtIODevice *vdev,
2103                                 uint32_t addr, uint32_t data)
2104{
2105    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2106    uint16_t val = data;
2107
2108    if (addr + sizeof(val) > vdev->config_len) {
2109        return;
2110    }
2111
2112    stw_le_p(vdev->config + addr, val);
2113
2114    if (k->set_config) {
2115        k->set_config(vdev, vdev->config);
2116    }
2117}
2118
2119void virtio_config_modern_writel(VirtIODevice *vdev,
2120                                 uint32_t addr, uint32_t data)
2121{
2122    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2123    uint32_t val = data;
2124
2125    if (addr + sizeof(val) > vdev->config_len) {
2126        return;
2127    }
2128
2129    stl_le_p(vdev->config + addr, val);
2130
2131    if (k->set_config) {
2132        k->set_config(vdev, vdev->config);
2133    }
2134}
2135
2136void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2137{
2138    if (!vdev->vq[n].vring.num) {
2139        return;
2140    }
2141    vdev->vq[n].vring.desc = addr;
2142    virtio_queue_update_rings(vdev, n);
2143}
2144
2145hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2146{
2147    return vdev->vq[n].vring.desc;
2148}
2149
2150void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2151                            hwaddr avail, hwaddr used)
2152{
2153    if (!vdev->vq[n].vring.num) {
2154        return;
2155    }
2156    vdev->vq[n].vring.desc = desc;
2157    vdev->vq[n].vring.avail = avail;
2158    vdev->vq[n].vring.used = used;
2159    virtio_init_region_cache(vdev, n);
2160}
2161
2162void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2163{
2164    /* Don't allow guest to flip queue between existent and
2165     * nonexistent states, or to set it to an invalid size.
2166     */
2167    if (!!num != !!vdev->vq[n].vring.num ||
2168        num > VIRTQUEUE_MAX_SIZE ||
2169        num < 0) {
2170        return;
2171    }
2172    vdev->vq[n].vring.num = num;
2173}
2174
2175VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2176{
2177    return QLIST_FIRST(&vdev->vector_queues[vector]);
2178}
2179
2180VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2181{
2182    return QLIST_NEXT(vq, node);
2183}
2184
2185int virtio_queue_get_num(VirtIODevice *vdev, int n)
2186{
2187    return vdev->vq[n].vring.num;
2188}
2189
2190int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2191{
2192    return vdev->vq[n].vring.num_default;
2193}
2194
2195int virtio_get_num_queues(VirtIODevice *vdev)
2196{
2197    int i;
2198
2199    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2200        if (!virtio_queue_get_num(vdev, i)) {
2201            break;
2202        }
2203    }
2204
2205    return i;
2206}
2207
2208void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2209{
2210    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2211    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2212
2213    /* virtio-1 compliant devices cannot change the alignment */
2214    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2215        error_report("tried to modify queue alignment for virtio-1 device");
2216        return;
2217    }
2218    /* Check that the transport told us it was going to do this
2219     * (so a buggy transport will immediately assert rather than
2220     * silently failing to migrate this state)
2221     */
2222    assert(k->has_variable_vring_alignment);
2223
2224    if (align) {
2225        vdev->vq[n].vring.align = align;
2226        virtio_queue_update_rings(vdev, n);
2227    }
2228}
2229
2230static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2231{
2232    bool ret = false;
2233
2234    if (vq->vring.desc && vq->handle_aio_output) {
2235        VirtIODevice *vdev = vq->vdev;
2236
2237        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2238        ret = vq->handle_aio_output(vdev, vq);
2239
2240        if (unlikely(vdev->start_on_kick)) {
2241            virtio_set_started(vdev, true);
2242        }
2243    }
2244
2245    return ret;
2246}
2247
2248static void virtio_queue_notify_vq(VirtQueue *vq)
2249{
2250    if (vq->vring.desc && vq->handle_output) {
2251        VirtIODevice *vdev = vq->vdev;
2252
2253        if (unlikely(vdev->broken)) {
2254            return;
2255        }
2256
2257        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2258        vq->handle_output(vdev, vq);
2259
2260        if (unlikely(vdev->start_on_kick)) {
2261            virtio_set_started(vdev, true);
2262        }
2263    }
2264}
2265
2266void virtio_queue_notify(VirtIODevice *vdev, int n)
2267{
2268    VirtQueue *vq = &vdev->vq[n];
2269
2270    if (unlikely(!vq->vring.desc || vdev->broken)) {
2271        return;
2272    }
2273
2274    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2275    if (vq->host_notifier_enabled) {
2276        event_notifier_set(&vq->host_notifier);
2277    } else if (vq->handle_output) {
2278        vq->handle_output(vdev, vq);
2279
2280        if (unlikely(vdev->start_on_kick)) {
2281            virtio_set_started(vdev, true);
2282        }
2283    }
2284}
2285
2286uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2287{
2288    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2289        VIRTIO_NO_VECTOR;
2290}
2291
2292void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2293{
2294    VirtQueue *vq = &vdev->vq[n];
2295
2296    if (n < VIRTIO_QUEUE_MAX) {
2297        if (vdev->vector_queues &&
2298            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2299            QLIST_REMOVE(vq, node);
2300        }
2301        vdev->vq[n].vector = vector;
2302        if (vdev->vector_queues &&
2303            vector != VIRTIO_NO_VECTOR) {
2304            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2305        }
2306    }
2307}
2308
2309VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2310                            VirtIOHandleOutput handle_output)
2311{
2312    int i;
2313
2314    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2315        if (vdev->vq[i].vring.num == 0)
2316            break;
2317    }
2318
2319    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2320        abort();
2321
2322    vdev->vq[i].vring.num = queue_size;
2323    vdev->vq[i].vring.num_default = queue_size;
2324    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2325    vdev->vq[i].handle_output = handle_output;
2326    vdev->vq[i].handle_aio_output = NULL;
2327    vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2328                                       queue_size);
2329
2330    return &vdev->vq[i];
2331}
2332
2333void virtio_del_queue(VirtIODevice *vdev, int n)
2334{
2335    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2336        abort();
2337    }
2338
2339    vdev->vq[n].vring.num = 0;
2340    vdev->vq[n].vring.num_default = 0;
2341    vdev->vq[n].handle_output = NULL;
2342    vdev->vq[n].handle_aio_output = NULL;
2343    g_free(vdev->vq[n].used_elems);
2344}
2345
2346static void virtio_set_isr(VirtIODevice *vdev, int value)
2347{
2348    uint8_t old = atomic_read(&vdev->isr);
2349
2350    /* Do not write ISR if it does not change, so that its cacheline remains
2351     * shared in the common case where the guest does not read it.
2352     */
2353    if ((old & value) != value) {
2354        atomic_or(&vdev->isr, value);
2355    }
2356}
2357
2358static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2359{
2360    uint16_t old, new;
2361    bool v;
2362    /* We need to expose used array entries before checking used event. */
2363    smp_mb();
2364    /* Always notify when queue is empty (when feature acknowledge) */
2365    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2366        !vq->inuse && virtio_queue_empty(vq)) {
2367        return true;
2368    }
2369
2370    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2371        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2372    }
2373
2374    v = vq->signalled_used_valid;
2375    vq->signalled_used_valid = true;
2376    old = vq->signalled_used;
2377    new = vq->signalled_used = vq->used_idx;
2378    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2379}
2380
2381static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2382                                    uint16_t off_wrap, uint16_t new,
2383                                    uint16_t old)
2384{
2385    int off = off_wrap & ~(1 << 15);
2386
2387    if (wrap != off_wrap >> 15) {
2388        off -= vq->vring.num;
2389    }
2390
2391    return vring_need_event(off, new, old);
2392}
2393
2394static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2395{
2396    VRingPackedDescEvent e;
2397    uint16_t old, new;
2398    bool v;
2399    VRingMemoryRegionCaches *caches;
2400
2401    caches = vring_get_region_caches(vq);
2402    vring_packed_event_read(vdev, &caches->avail, &e);
2403
2404    old = vq->signalled_used;
2405    new = vq->signalled_used = vq->used_idx;
2406    v = vq->signalled_used_valid;
2407    vq->signalled_used_valid = true;
2408
2409    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2410        return false;
2411    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2412        return true;
2413    }
2414
2415    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2416                                         e.off_wrap, new, old);
2417}
2418
2419/* Called within rcu_read_lock().  */
2420static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2421{
2422    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2423        return virtio_packed_should_notify(vdev, vq);
2424    } else {
2425        return virtio_split_should_notify(vdev, vq);
2426    }
2427}
2428
2429void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2430{
2431    WITH_RCU_READ_LOCK_GUARD() {
2432        if (!virtio_should_notify(vdev, vq)) {
2433            return;
2434        }
2435    }
2436
2437    trace_virtio_notify_irqfd(vdev, vq);
2438
2439    /*
2440     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2441     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2442     * incorrectly polling this bit during crashdump and hibernation
2443     * in MSI mode, causing a hang if this bit is never updated.
2444     * Recent releases of Windows do not really shut down, but rather
2445     * log out and hibernate to make the next startup faster.  Hence,
2446     * this manifested as a more serious hang during shutdown with
2447     *
2448     * Next driver release from 2016 fixed this problem, so working around it
2449     * is not a must, but it's easy to do so let's do it here.
2450     *
2451     * Note: it's safe to update ISR from any thread as it was switched
2452     * to an atomic operation.
2453     */
2454    virtio_set_isr(vq->vdev, 0x1);
2455    event_notifier_set(&vq->guest_notifier);
2456}
2457
2458static void virtio_irq(VirtQueue *vq)
2459{
2460    virtio_set_isr(vq->vdev, 0x1);
2461    virtio_notify_vector(vq->vdev, vq->vector);
2462}
2463
2464void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2465{
2466    WITH_RCU_READ_LOCK_GUARD() {
2467        if (!virtio_should_notify(vdev, vq)) {
2468            return;
2469        }
2470    }
2471
2472    trace_virtio_notify(vdev, vq);
2473    virtio_irq(vq);
2474}
2475
2476void virtio_notify_config(VirtIODevice *vdev)
2477{
2478    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2479        return;
2480
2481    virtio_set_isr(vdev, 0x3);
2482    vdev->generation++;
2483    virtio_notify_vector(vdev, vdev->config_vector);
2484}
2485
2486static bool virtio_device_endian_needed(void *opaque)
2487{
2488    VirtIODevice *vdev = opaque;
2489
2490    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2491    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2492        return vdev->device_endian != virtio_default_endian();
2493    }
2494    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2495    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2496}
2497
2498static bool virtio_64bit_features_needed(void *opaque)
2499{
2500    VirtIODevice *vdev = opaque;
2501
2502    return (vdev->host_features >> 32) != 0;
2503}
2504
2505static bool virtio_virtqueue_needed(void *opaque)
2506{
2507    VirtIODevice *vdev = opaque;
2508
2509    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2510}
2511
2512static bool virtio_packed_virtqueue_needed(void *opaque)
2513{
2514    VirtIODevice *vdev = opaque;
2515
2516    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2517}
2518
2519static bool virtio_ringsize_needed(void *opaque)
2520{
2521    VirtIODevice *vdev = opaque;
2522    int i;
2523
2524    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2525        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2526            return true;
2527        }
2528    }
2529    return false;
2530}
2531
2532static bool virtio_extra_state_needed(void *opaque)
2533{
2534    VirtIODevice *vdev = opaque;
2535    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2536    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2537
2538    return k->has_extra_state &&
2539        k->has_extra_state(qbus->parent);
2540}
2541
2542static bool virtio_broken_needed(void *opaque)
2543{
2544    VirtIODevice *vdev = opaque;
2545
2546    return vdev->broken;
2547}
2548
2549static bool virtio_started_needed(void *opaque)
2550{
2551    VirtIODevice *vdev = opaque;
2552
2553    return vdev->started;
2554}
2555
2556static const VMStateDescription vmstate_virtqueue = {
2557    .name = "virtqueue_state",
2558    .version_id = 1,
2559    .minimum_version_id = 1,
2560    .fields = (VMStateField[]) {
2561        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2562        VMSTATE_UINT64(vring.used, struct VirtQueue),
2563        VMSTATE_END_OF_LIST()
2564    }
2565};
2566
2567static const VMStateDescription vmstate_packed_virtqueue = {
2568    .name = "packed_virtqueue_state",
2569    .version_id = 1,
2570    .minimum_version_id = 1,
2571    .fields = (VMStateField[]) {
2572        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2573        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2574        VMSTATE_UINT16(used_idx, struct VirtQueue),
2575        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2576        VMSTATE_UINT32(inuse, struct VirtQueue),
2577        VMSTATE_END_OF_LIST()
2578    }
2579};
2580
2581static const VMStateDescription vmstate_virtio_virtqueues = {
2582    .name = "virtio/virtqueues",
2583    .version_id = 1,
2584    .minimum_version_id = 1,
2585    .needed = &virtio_virtqueue_needed,
2586    .fields = (VMStateField[]) {
2587        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2588                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2589        VMSTATE_END_OF_LIST()
2590    }
2591};
2592
2593static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2594    .name = "virtio/packed_virtqueues",
2595    .version_id = 1,
2596    .minimum_version_id = 1,
2597    .needed = &virtio_packed_virtqueue_needed,
2598    .fields = (VMStateField[]) {
2599        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2600                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2601        VMSTATE_END_OF_LIST()
2602    }
2603};
2604
2605static const VMStateDescription vmstate_ringsize = {
2606    .name = "ringsize_state",
2607    .version_id = 1,
2608    .minimum_version_id = 1,
2609    .fields = (VMStateField[]) {
2610        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2611        VMSTATE_END_OF_LIST()
2612    }
2613};
2614
2615static const VMStateDescription vmstate_virtio_ringsize = {
2616    .name = "virtio/ringsize",
2617    .version_id = 1,
2618    .minimum_version_id = 1,
2619    .needed = &virtio_ringsize_needed,
2620    .fields = (VMStateField[]) {
2621        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2622                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2623        VMSTATE_END_OF_LIST()
2624    }
2625};
2626
2627static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2628                           const VMStateField *field)
2629{
2630    VirtIODevice *vdev = pv;
2631    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2632    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2633
2634    if (!k->load_extra_state) {
2635        return -1;
2636    } else {
2637        return k->load_extra_state(qbus->parent, f);
2638    }
2639}
2640
2641static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2642                           const VMStateField *field, QJSON *vmdesc)
2643{
2644    VirtIODevice *vdev = pv;
2645    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2646    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2647
2648    k->save_extra_state(qbus->parent, f);
2649    return 0;
2650}
2651
2652static const VMStateInfo vmstate_info_extra_state = {
2653    .name = "virtqueue_extra_state",
2654    .get = get_extra_state,
2655    .put = put_extra_state,
2656};
2657
2658static const VMStateDescription vmstate_virtio_extra_state = {
2659    .name = "virtio/extra_state",
2660    .version_id = 1,
2661    .minimum_version_id = 1,
2662    .needed = &virtio_extra_state_needed,
2663    .fields = (VMStateField[]) {
2664        {
2665            .name         = "extra_state",
2666            .version_id   = 0,
2667            .field_exists = NULL,
2668            .size         = 0,
2669            .info         = &vmstate_info_extra_state,
2670            .flags        = VMS_SINGLE,
2671            .offset       = 0,
2672        },
2673        VMSTATE_END_OF_LIST()
2674    }
2675};
2676
2677static const VMStateDescription vmstate_virtio_device_endian = {
2678    .name = "virtio/device_endian",
2679    .version_id = 1,
2680    .minimum_version_id = 1,
2681    .needed = &virtio_device_endian_needed,
2682    .fields = (VMStateField[]) {
2683        VMSTATE_UINT8(device_endian, VirtIODevice),
2684        VMSTATE_END_OF_LIST()
2685    }
2686};
2687
2688static const VMStateDescription vmstate_virtio_64bit_features = {
2689    .name = "virtio/64bit_features",
2690    .version_id = 1,
2691    .minimum_version_id = 1,
2692    .needed = &virtio_64bit_features_needed,
2693    .fields = (VMStateField[]) {
2694        VMSTATE_UINT64(guest_features, VirtIODevice),
2695        VMSTATE_END_OF_LIST()
2696    }
2697};
2698
2699static const VMStateDescription vmstate_virtio_broken = {
2700    .name = "virtio/broken",
2701    .version_id = 1,
2702    .minimum_version_id = 1,
2703    .needed = &virtio_broken_needed,
2704    .fields = (VMStateField[]) {
2705        VMSTATE_BOOL(broken, VirtIODevice),
2706        VMSTATE_END_OF_LIST()
2707    }
2708};
2709
2710static const VMStateDescription vmstate_virtio_started = {
2711    .name = "virtio/started",
2712    .version_id = 1,
2713    .minimum_version_id = 1,
2714    .needed = &virtio_started_needed,
2715    .fields = (VMStateField[]) {
2716        VMSTATE_BOOL(started, VirtIODevice),
2717        VMSTATE_END_OF_LIST()
2718    }
2719};
2720
2721static const VMStateDescription vmstate_virtio = {
2722    .name = "virtio",
2723    .version_id = 1,
2724    .minimum_version_id = 1,
2725    .minimum_version_id_old = 1,
2726    .fields = (VMStateField[]) {
2727        VMSTATE_END_OF_LIST()
2728    },
2729    .subsections = (const VMStateDescription*[]) {
2730        &vmstate_virtio_device_endian,
2731        &vmstate_virtio_64bit_features,
2732        &vmstate_virtio_virtqueues,
2733        &vmstate_virtio_ringsize,
2734        &vmstate_virtio_broken,
2735        &vmstate_virtio_extra_state,
2736        &vmstate_virtio_started,
2737        &vmstate_virtio_packed_virtqueues,
2738        NULL
2739    }
2740};
2741
2742int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2743{
2744    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2745    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2746    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2747    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2748    int i;
2749
2750    if (k->save_config) {
2751        k->save_config(qbus->parent, f);
2752    }
2753
2754    qemu_put_8s(f, &vdev->status);
2755    qemu_put_8s(f, &vdev->isr);
2756    qemu_put_be16s(f, &vdev->queue_sel);
2757    qemu_put_be32s(f, &guest_features_lo);
2758    qemu_put_be32(f, vdev->config_len);
2759    qemu_put_buffer(f, vdev->config, vdev->config_len);
2760
2761    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2762        if (vdev->vq[i].vring.num == 0)
2763            break;
2764    }
2765
2766    qemu_put_be32(f, i);
2767
2768    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2769        if (vdev->vq[i].vring.num == 0)
2770            break;
2771
2772        qemu_put_be32(f, vdev->vq[i].vring.num);
2773        if (k->has_variable_vring_alignment) {
2774            qemu_put_be32(f, vdev->vq[i].vring.align);
2775        }
2776        /*
2777         * Save desc now, the rest of the ring addresses are saved in
2778         * subsections for VIRTIO-1 devices.
2779         */
2780        qemu_put_be64(f, vdev->vq[i].vring.desc);
2781        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2782        if (k->save_queue) {
2783            k->save_queue(qbus->parent, i, f);
2784        }
2785    }
2786
2787    if (vdc->save != NULL) {
2788        vdc->save(vdev, f);
2789    }
2790
2791    if (vdc->vmsd) {
2792        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2793        if (ret) {
2794            return ret;
2795        }
2796    }
2797
2798    /* Subsections */
2799    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2800}
2801
2802/* A wrapper for use as a VMState .put function */
2803static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2804                              const VMStateField *field, QJSON *vmdesc)
2805{
2806    return virtio_save(VIRTIO_DEVICE(opaque), f);
2807}
2808
2809/* A wrapper for use as a VMState .get function */
2810static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2811                             const VMStateField *field)
2812{
2813    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2814    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2815
2816    return virtio_load(vdev, f, dc->vmsd->version_id);
2817}
2818
2819const VMStateInfo  virtio_vmstate_info = {
2820    .name = "virtio",
2821    .get = virtio_device_get,
2822    .put = virtio_device_put,
2823};
2824
2825static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2826{
2827    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2828    bool bad = (val & ~(vdev->host_features)) != 0;
2829
2830    val &= vdev->host_features;
2831    if (k->set_features) {
2832        k->set_features(vdev, val);
2833    }
2834    vdev->guest_features = val;
2835    return bad ? -1 : 0;
2836}
2837
2838int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2839{
2840    int ret;
2841    /*
2842     * The driver must not attempt to set features after feature negotiation
2843     * has finished.
2844     */
2845    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2846        return -EINVAL;
2847    }
2848    ret = virtio_set_features_nocheck(vdev, val);
2849    if (!ret) {
2850        if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2851            /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2852            int i;
2853            for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2854                if (vdev->vq[i].vring.num != 0) {
2855                    virtio_init_region_cache(vdev, i);
2856                }
2857            }
2858        }
2859
2860        if (!virtio_device_started(vdev, vdev->status) &&
2861            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2862            vdev->start_on_kick = true;
2863        }
2864    }
2865    return ret;
2866}
2867
2868size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2869                                      uint64_t host_features)
2870{
2871    size_t config_size = 0;
2872    int i;
2873
2874    for (i = 0; feature_sizes[i].flags != 0; i++) {
2875        if (host_features & feature_sizes[i].flags) {
2876            config_size = MAX(feature_sizes[i].end, config_size);
2877        }
2878    }
2879
2880    return config_size;
2881}
2882
2883int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2884{
2885    int i, ret;
2886    int32_t config_len;
2887    uint32_t num;
2888    uint32_t features;
2889    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2890    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2891    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2892
2893    /*
2894     * We poison the endianness to ensure it does not get used before
2895     * subsections have been loaded.
2896     */
2897    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2898
2899    if (k->load_config) {
2900        ret = k->load_config(qbus->parent, f);
2901        if (ret)
2902            return ret;
2903    }
2904
2905    qemu_get_8s(f, &vdev->status);
2906    qemu_get_8s(f, &vdev->isr);
2907    qemu_get_be16s(f, &vdev->queue_sel);
2908    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2909        return -1;
2910    }
2911    qemu_get_be32s(f, &features);
2912
2913    /*
2914     * Temporarily set guest_features low bits - needed by
2915     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2916     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2917     *
2918     * Note: devices should always test host features in future - don't create
2919     * new dependencies like this.
2920     */
2921    vdev->guest_features = features;
2922
2923    config_len = qemu_get_be32(f);
2924
2925    /*
2926     * There are cases where the incoming config can be bigger or smaller
2927     * than what we have; so load what we have space for, and skip
2928     * any excess that's in the stream.
2929     */
2930    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2931
2932    while (config_len > vdev->config_len) {
2933        qemu_get_byte(f);
2934        config_len--;
2935    }
2936
2937    num = qemu_get_be32(f);
2938
2939    if (num > VIRTIO_QUEUE_MAX) {
2940        error_report("Invalid number of virtqueues: 0x%x", num);
2941        return -1;
2942    }
2943
2944    for (i = 0; i < num; i++) {
2945        vdev->vq[i].vring.num = qemu_get_be32(f);
2946        if (k->has_variable_vring_alignment) {
2947            vdev->vq[i].vring.align = qemu_get_be32(f);
2948        }
2949        vdev->vq[i].vring.desc = qemu_get_be64(f);
2950        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2951        vdev->vq[i].signalled_used_valid = false;
2952        vdev->vq[i].notification = true;
2953
2954        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2955            error_report("VQ %d address 0x0 "
2956                         "inconsistent with Host index 0x%x",
2957                         i, vdev->vq[i].last_avail_idx);
2958            return -1;
2959        }
2960        if (k->load_queue) {
2961            ret = k->load_queue(qbus->parent, i, f);
2962            if (ret)
2963                return ret;
2964        }
2965    }
2966
2967    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2968
2969    if (vdc->load != NULL) {
2970        ret = vdc->load(vdev, f, version_id);
2971        if (ret) {
2972            return ret;
2973        }
2974    }
2975
2976    if (vdc->vmsd) {
2977        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
2978        if (ret) {
2979            return ret;
2980        }
2981    }
2982
2983    /* Subsections */
2984    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
2985    if (ret) {
2986        return ret;
2987    }
2988
2989    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
2990        vdev->device_endian = virtio_default_endian();
2991    }
2992
2993    if (virtio_64bit_features_needed(vdev)) {
2994        /*
2995         * Subsection load filled vdev->guest_features.  Run them
2996         * through virtio_set_features to sanity-check them against
2997         * host_features.
2998         */
2999        uint64_t features64 = vdev->guest_features;
3000        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3001            error_report("Features 0x%" PRIx64 " unsupported. "
3002                         "Allowed features: 0x%" PRIx64,
3003                         features64, vdev->host_features);
3004            return -1;
3005        }
3006    } else {
3007        if (virtio_set_features_nocheck(vdev, features) < 0) {
3008            error_report("Features 0x%x unsupported. "
3009                         "Allowed features: 0x%" PRIx64,
3010                         features, vdev->host_features);
3011            return -1;
3012        }
3013    }
3014
3015    if (!virtio_device_started(vdev, vdev->status) &&
3016        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3017        vdev->start_on_kick = true;
3018    }
3019
3020    RCU_READ_LOCK_GUARD();
3021    for (i = 0; i < num; i++) {
3022        if (vdev->vq[i].vring.desc) {
3023            uint16_t nheads;
3024
3025            /*
3026             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3027             * only the region cache needs to be set up.  Legacy devices need
3028             * to calculate used and avail ring addresses based on the desc
3029             * address.
3030             */
3031            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3032                virtio_init_region_cache(vdev, i);
3033            } else {
3034                virtio_queue_update_rings(vdev, i);
3035            }
3036
3037            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3038                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3039                vdev->vq[i].shadow_avail_wrap_counter =
3040                                        vdev->vq[i].last_avail_wrap_counter;
3041                continue;
3042            }
3043
3044            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3045            /* Check it isn't doing strange things with descriptor numbers. */
3046            if (nheads > vdev->vq[i].vring.num) {
3047                error_report("VQ %d size 0x%x Guest index 0x%x "
3048                             "inconsistent with Host index 0x%x: delta 0x%x",
3049                             i, vdev->vq[i].vring.num,
3050                             vring_avail_idx(&vdev->vq[i]),
3051                             vdev->vq[i].last_avail_idx, nheads);
3052                return -1;
3053            }
3054            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3055            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3056
3057            /*
3058             * Some devices migrate VirtQueueElements that have been popped
3059             * from the avail ring but not yet returned to the used ring.
3060             * Since max ring size < UINT16_MAX it's safe to use modulo
3061             * UINT16_MAX + 1 subtraction.
3062             */
3063            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3064                                vdev->vq[i].used_idx);
3065            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3066                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3067                             "used_idx 0x%x",
3068                             i, vdev->vq[i].vring.num,
3069                             vdev->vq[i].last_avail_idx,
3070                             vdev->vq[i].used_idx);
3071                return -1;
3072            }
3073        }
3074    }
3075
3076    if (vdc->post_load) {
3077        ret = vdc->post_load(vdev);
3078        if (ret) {
3079            return ret;
3080        }
3081    }
3082
3083    return 0;
3084}
3085
3086void virtio_cleanup(VirtIODevice *vdev)
3087{
3088    qemu_del_vm_change_state_handler(vdev->vmstate);
3089}
3090
3091static void virtio_vmstate_change(void *opaque, int running, RunState state)
3092{
3093    VirtIODevice *vdev = opaque;
3094    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3095    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3096    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3097    vdev->vm_running = running;
3098
3099    if (backend_run) {
3100        virtio_set_status(vdev, vdev->status);
3101    }
3102
3103    if (k->vmstate_change) {
3104        k->vmstate_change(qbus->parent, backend_run);
3105    }
3106
3107    if (!backend_run) {
3108        virtio_set_status(vdev, vdev->status);
3109    }
3110}
3111
3112void virtio_instance_init_common(Object *proxy_obj, void *data,
3113                                 size_t vdev_size, const char *vdev_name)
3114{
3115    DeviceState *vdev = data;
3116
3117    object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size,
3118                            vdev_name, &error_abort, NULL);
3119    qdev_alias_all_properties(vdev, proxy_obj);
3120}
3121
3122void virtio_init(VirtIODevice *vdev, const char *name,
3123                 uint16_t device_id, size_t config_size)
3124{
3125    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3126    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3127    int i;
3128    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3129
3130    if (nvectors) {
3131        vdev->vector_queues =
3132            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3133    }
3134
3135    vdev->start_on_kick = false;
3136    vdev->started = false;
3137    vdev->device_id = device_id;
3138    vdev->status = 0;
3139    atomic_set(&vdev->isr, 0);
3140    vdev->queue_sel = 0;
3141    vdev->config_vector = VIRTIO_NO_VECTOR;
3142    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3143    vdev->vm_running = runstate_is_running();
3144    vdev->broken = false;
3145    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3146        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3147        vdev->vq[i].vdev = vdev;
3148        vdev->vq[i].queue_index = i;
3149        vdev->vq[i].host_notifier_enabled = false;
3150    }
3151
3152    vdev->name = name;
3153    vdev->config_len = config_size;
3154    if (vdev->config_len) {
3155        vdev->config = g_malloc0(config_size);
3156    } else {
3157        vdev->config = NULL;
3158    }
3159    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3160            virtio_vmstate_change, vdev);
3161    vdev->device_endian = virtio_default_endian();
3162    vdev->use_guest_notifier_mask = true;
3163}
3164
3165hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3166{
3167    return vdev->vq[n].vring.desc;
3168}
3169
3170bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3171{
3172    return virtio_queue_get_desc_addr(vdev, n) != 0;
3173}
3174
3175hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3176{
3177    return vdev->vq[n].vring.avail;
3178}
3179
3180hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3181{
3182    return vdev->vq[n].vring.used;
3183}
3184
3185hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3186{
3187    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3188}
3189
3190hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3191{
3192    int s;
3193
3194    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3195        return sizeof(struct VRingPackedDescEvent);
3196    }
3197
3198    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3199    return offsetof(VRingAvail, ring) +
3200        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3201}
3202
3203hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3204{
3205    int s;
3206
3207    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3208        return sizeof(struct VRingPackedDescEvent);
3209    }
3210
3211    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3212    return offsetof(VRingUsed, ring) +
3213        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3214}
3215
3216static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3217                                                           int n)
3218{
3219    unsigned int avail, used;
3220
3221    avail = vdev->vq[n].last_avail_idx;
3222    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3223
3224    used = vdev->vq[n].used_idx;
3225    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3226
3227    return avail | used << 16;
3228}
3229
3230static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3231                                                      int n)
3232{
3233    return vdev->vq[n].last_avail_idx;
3234}
3235
3236unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3237{
3238    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3239        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3240    } else {
3241        return virtio_queue_split_get_last_avail_idx(vdev, n);
3242    }
3243}
3244
3245static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3246                                                   int n, unsigned int idx)
3247{
3248    struct VirtQueue *vq = &vdev->vq[n];
3249
3250    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3251    vq->last_avail_wrap_counter =
3252        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3253    idx >>= 16;
3254    vq->used_idx = idx & 0x7ffff;
3255    vq->used_wrap_counter = !!(idx & 0x8000);
3256}
3257
3258static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3259                                                  int n, unsigned int idx)
3260{
3261        vdev->vq[n].last_avail_idx = idx;
3262        vdev->vq[n].shadow_avail_idx = idx;
3263}
3264
3265void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3266                                     unsigned int idx)
3267{
3268    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3269        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3270    } else {
3271        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3272    }
3273}
3274
3275static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3276                                                       int n)
3277{
3278    /* We don't have a reference like avail idx in shared memory */
3279    return;
3280}
3281
3282static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3283                                                      int n)
3284{
3285    RCU_READ_LOCK_GUARD();
3286    if (vdev->vq[n].vring.desc) {
3287        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3288        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3289    }
3290}
3291
3292void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3293{
3294    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3295        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3296    } else {
3297        virtio_queue_split_restore_last_avail_idx(vdev, n);
3298    }
3299}
3300
3301static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3302{
3303    /* used idx was updated through set_last_avail_idx() */
3304    return;
3305}
3306
3307static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3308{
3309    RCU_READ_LOCK_GUARD();
3310    if (vdev->vq[n].vring.desc) {
3311        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3312    }
3313}
3314
3315void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3316{
3317    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3318        return virtio_queue_packed_update_used_idx(vdev, n);
3319    } else {
3320        return virtio_split_packed_update_used_idx(vdev, n);
3321    }
3322}
3323
3324void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3325{
3326    vdev->vq[n].signalled_used_valid = false;
3327}
3328
3329VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3330{
3331    return vdev->vq + n;
3332}
3333
3334uint16_t virtio_get_queue_index(VirtQueue *vq)
3335{
3336    return vq->queue_index;
3337}
3338
3339static void virtio_queue_guest_notifier_read(EventNotifier *n)
3340{
3341    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3342    if (event_notifier_test_and_clear(n)) {
3343        virtio_irq(vq);
3344    }
3345}
3346
3347void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3348                                                bool with_irqfd)
3349{
3350    if (assign && !with_irqfd) {
3351        event_notifier_set_handler(&vq->guest_notifier,
3352                                   virtio_queue_guest_notifier_read);
3353    } else {
3354        event_notifier_set_handler(&vq->guest_notifier, NULL);
3355    }
3356    if (!assign) {
3357        /* Test and clear notifier before closing it,
3358         * in case poll callback didn't have time to run. */
3359        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3360    }
3361}
3362
3363EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3364{
3365    return &vq->guest_notifier;
3366}
3367
3368static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3369{
3370    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3371    if (event_notifier_test_and_clear(n)) {
3372        virtio_queue_notify_aio_vq(vq);
3373    }
3374}
3375
3376static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3377{
3378    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3379
3380    virtio_queue_set_notification(vq, 0);
3381}
3382
3383static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3384{
3385    EventNotifier *n = opaque;
3386    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3387    bool progress;
3388
3389    if (!vq->vring.desc || virtio_queue_empty(vq)) {
3390        return false;
3391    }
3392
3393    progress = virtio_queue_notify_aio_vq(vq);
3394
3395    /* In case the handler function re-enabled notifications */
3396    virtio_queue_set_notification(vq, 0);
3397    return progress;
3398}
3399
3400static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3401{
3402    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3403
3404    /* Caller polls once more after this to catch requests that race with us */
3405    virtio_queue_set_notification(vq, 1);
3406}
3407
3408void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3409                                                VirtIOHandleAIOOutput handle_output)
3410{
3411    if (handle_output) {
3412        vq->handle_aio_output = handle_output;
3413        aio_set_event_notifier(ctx, &vq->host_notifier, true,
3414                               virtio_queue_host_notifier_aio_read,
3415                               virtio_queue_host_notifier_aio_poll);
3416        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3417                                    virtio_queue_host_notifier_aio_poll_begin,
3418                                    virtio_queue_host_notifier_aio_poll_end);
3419    } else {
3420        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3421        /* Test and clear notifier before after disabling event,
3422         * in case poll callback didn't have time to run. */
3423        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3424        vq->handle_aio_output = NULL;
3425    }
3426}
3427
3428void virtio_queue_host_notifier_read(EventNotifier *n)
3429{
3430    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3431    if (event_notifier_test_and_clear(n)) {
3432        virtio_queue_notify_vq(vq);
3433    }
3434}
3435
3436EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3437{
3438    return &vq->host_notifier;
3439}
3440
3441void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3442{
3443    vq->host_notifier_enabled = enabled;
3444}
3445
3446int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3447                                      MemoryRegion *mr, bool assign)
3448{
3449    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3450    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3451
3452    if (k->set_host_notifier_mr) {
3453        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3454    }
3455
3456    return -1;
3457}
3458
3459void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3460{
3461    g_free(vdev->bus_name);
3462    vdev->bus_name = g_strdup(bus_name);
3463}
3464
3465void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3466{
3467    va_list ap;
3468
3469    va_start(ap, fmt);
3470    error_vreport(fmt, ap);
3471    va_end(ap);
3472
3473    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3474        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3475        virtio_notify_config(vdev);
3476    }
3477
3478    vdev->broken = true;
3479}
3480
3481static void virtio_memory_listener_commit(MemoryListener *listener)
3482{
3483    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3484    int i;
3485
3486    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3487        if (vdev->vq[i].vring.num == 0) {
3488            break;
3489        }
3490        virtio_init_region_cache(vdev, i);
3491    }
3492}
3493
3494static void virtio_device_realize(DeviceState *dev, Error **errp)
3495{
3496    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3497    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3498    Error *err = NULL;
3499
3500    /* Devices should either use vmsd or the load/save methods */
3501    assert(!vdc->vmsd || !vdc->load);
3502
3503    if (vdc->realize != NULL) {
3504        vdc->realize(dev, &err);
3505        if (err != NULL) {
3506            error_propagate(errp, err);
3507            return;
3508        }
3509    }
3510
3511    virtio_bus_device_plugged(vdev, &err);
3512    if (err != NULL) {
3513        error_propagate(errp, err);
3514        vdc->unrealize(dev, NULL);
3515        return;
3516    }
3517
3518    vdev->listener.commit = virtio_memory_listener_commit;
3519    memory_listener_register(&vdev->listener, vdev->dma_as);
3520}
3521
3522static void virtio_device_unrealize(DeviceState *dev, Error **errp)
3523{
3524    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3525    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3526    Error *err = NULL;
3527
3528    virtio_bus_device_unplugged(vdev);
3529
3530    if (vdc->unrealize != NULL) {
3531        vdc->unrealize(dev, &err);
3532        if (err != NULL) {
3533            error_propagate(errp, err);
3534            return;
3535        }
3536    }
3537
3538    g_free(vdev->bus_name);
3539    vdev->bus_name = NULL;
3540}
3541
3542static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3543{
3544    int i;
3545    if (!vdev->vq) {
3546        return;
3547    }
3548
3549    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3550        if (vdev->vq[i].vring.num == 0) {
3551            break;
3552        }
3553        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3554    }
3555    g_free(vdev->vq);
3556}
3557
3558static void virtio_device_instance_finalize(Object *obj)
3559{
3560    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3561
3562    memory_listener_unregister(&vdev->listener);
3563    virtio_device_free_virtqueues(vdev);
3564
3565    g_free(vdev->config);
3566    g_free(vdev->vector_queues);
3567}
3568
3569static Property virtio_properties[] = {
3570    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3571    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3572    DEFINE_PROP_END_OF_LIST(),
3573};
3574
3575static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3576{
3577    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3578    int i, n, r, err;
3579
3580    memory_region_transaction_begin();
3581    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3582        VirtQueue *vq = &vdev->vq[n];
3583        if (!virtio_queue_get_num(vdev, n)) {
3584            continue;
3585        }
3586        r = virtio_bus_set_host_notifier(qbus, n, true);
3587        if (r < 0) {
3588            err = r;
3589            goto assign_error;
3590        }
3591        event_notifier_set_handler(&vq->host_notifier,
3592                                   virtio_queue_host_notifier_read);
3593    }
3594
3595    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3596        /* Kick right away to begin processing requests already in vring */
3597        VirtQueue *vq = &vdev->vq[n];
3598        if (!vq->vring.num) {
3599            continue;
3600        }
3601        event_notifier_set(&vq->host_notifier);
3602    }
3603    memory_region_transaction_commit();
3604    return 0;
3605
3606assign_error:
3607    i = n; /* save n for a second iteration after transaction is committed. */
3608    while (--n >= 0) {
3609        VirtQueue *vq = &vdev->vq[n];
3610        if (!virtio_queue_get_num(vdev, n)) {
3611            continue;
3612        }
3613
3614        event_notifier_set_handler(&vq->host_notifier, NULL);
3615        r = virtio_bus_set_host_notifier(qbus, n, false);
3616        assert(r >= 0);
3617    }
3618    memory_region_transaction_commit();
3619
3620    while (--i >= 0) {
3621        if (!virtio_queue_get_num(vdev, i)) {
3622            continue;
3623        }
3624        virtio_bus_cleanup_host_notifier(qbus, i);
3625    }
3626    return err;
3627}
3628
3629int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3630{
3631    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3632    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3633
3634    return virtio_bus_start_ioeventfd(vbus);
3635}
3636
3637static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3638{
3639    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3640    int n, r;
3641
3642    memory_region_transaction_begin();
3643    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3644        VirtQueue *vq = &vdev->vq[n];
3645
3646        if (!virtio_queue_get_num(vdev, n)) {
3647            continue;
3648        }
3649        event_notifier_set_handler(&vq->host_notifier, NULL);
3650        r = virtio_bus_set_host_notifier(qbus, n, false);
3651        assert(r >= 0);
3652    }
3653    memory_region_transaction_commit();
3654
3655    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3656        if (!virtio_queue_get_num(vdev, n)) {
3657            continue;
3658        }
3659        virtio_bus_cleanup_host_notifier(qbus, n);
3660    }
3661}
3662
3663int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3664{
3665    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3666    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3667
3668    return virtio_bus_grab_ioeventfd(vbus);
3669}
3670
3671void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3672{
3673    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3674    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3675
3676    virtio_bus_release_ioeventfd(vbus);
3677}
3678
3679static void virtio_device_class_init(ObjectClass *klass, void *data)
3680{
3681    /* Set the default value here. */
3682    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3683    DeviceClass *dc = DEVICE_CLASS(klass);
3684
3685    dc->realize = virtio_device_realize;
3686    dc->unrealize = virtio_device_unrealize;
3687    dc->bus_type = TYPE_VIRTIO_BUS;
3688    dc->props = virtio_properties;
3689    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3690    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3691
3692    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3693}
3694
3695bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3696{
3697    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3698    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3699
3700    return virtio_bus_ioeventfd_enabled(vbus);
3701}
3702
3703static const TypeInfo virtio_device_info = {
3704    .name = TYPE_VIRTIO_DEVICE,
3705    .parent = TYPE_DEVICE,
3706    .instance_size = sizeof(VirtIODevice),
3707    .class_init = virtio_device_class_init,
3708    .instance_finalize = virtio_device_instance_finalize,
3709    .abstract = true,
3710    .class_size = sizeof(VirtioDeviceClass),
3711};
3712
3713static void virtio_register_types(void)
3714{
3715    type_register_static(&virtio_device_info);
3716}
3717
3718type_init(virtio_register_types)
3719