qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "cpu.h"
  17#include "trace.h"
  18#include "qemu/error-report.h"
  19#include "qemu/log.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "qemu/atomic.h"
  25#include "hw/virtio/virtio-bus.h"
  26#include "hw/qdev-properties.h"
  27#include "hw/virtio/virtio-access.h"
  28#include "sysemu/dma.h"
  29#include "sysemu/runstate.h"
  30#include "standard-headers/linux/virtio_ids.h"
  31
  32/*
  33 * The alignment to use between consumer and producer parts of vring.
  34 * x86 pagesize again. This is the default, used by transports like PCI
  35 * which don't provide a means for the guest to tell the host the alignment.
  36 */
  37#define VIRTIO_PCI_VRING_ALIGN         4096
  38
  39typedef struct VRingDesc
  40{
  41    uint64_t addr;
  42    uint32_t len;
  43    uint16_t flags;
  44    uint16_t next;
  45} VRingDesc;
  46
  47typedef struct VRingPackedDesc {
  48    uint64_t addr;
  49    uint32_t len;
  50    uint16_t id;
  51    uint16_t flags;
  52} VRingPackedDesc;
  53
  54typedef struct VRingAvail
  55{
  56    uint16_t flags;
  57    uint16_t idx;
  58    uint16_t ring[];
  59} VRingAvail;
  60
  61typedef struct VRingUsedElem
  62{
  63    uint32_t id;
  64    uint32_t len;
  65} VRingUsedElem;
  66
  67typedef struct VRingUsed
  68{
  69    uint16_t flags;
  70    uint16_t idx;
  71    VRingUsedElem ring[];
  72} VRingUsed;
  73
  74typedef struct VRingMemoryRegionCaches {
  75    struct rcu_head rcu;
  76    MemoryRegionCache desc;
  77    MemoryRegionCache avail;
  78    MemoryRegionCache used;
  79} VRingMemoryRegionCaches;
  80
  81typedef struct VRing
  82{
  83    unsigned int num;
  84    unsigned int num_default;
  85    unsigned int align;
  86    hwaddr desc;
  87    hwaddr avail;
  88    hwaddr used;
  89    VRingMemoryRegionCaches *caches;
  90} VRing;
  91
  92typedef struct VRingPackedDescEvent {
  93    uint16_t off_wrap;
  94    uint16_t flags;
  95} VRingPackedDescEvent ;
  96
  97struct VirtQueue
  98{
  99    VRing vring;
 100    VirtQueueElement *used_elems;
 101
 102    /* Next head to pop */
 103    uint16_t last_avail_idx;
 104    bool last_avail_wrap_counter;
 105
 106    /* Last avail_idx read from VQ. */
 107    uint16_t shadow_avail_idx;
 108    bool shadow_avail_wrap_counter;
 109
 110    uint16_t used_idx;
 111    bool used_wrap_counter;
 112
 113    /* Last used index value we have signalled on */
 114    uint16_t signalled_used;
 115
 116    /* Last used index value we have signalled on */
 117    bool signalled_used_valid;
 118
 119    /* Notification enabled? */
 120    bool notification;
 121
 122    uint16_t queue_index;
 123
 124    unsigned int inuse;
 125
 126    uint16_t vector;
 127    VirtIOHandleOutput handle_output;
 128    VirtIODevice *vdev;
 129    EventNotifier guest_notifier;
 130    EventNotifier host_notifier;
 131    bool host_notifier_enabled;
 132    QLIST_ENTRY(VirtQueue) node;
 133};
 134
 135/* Called within call_rcu().  */
 136static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 137{
 138    assert(caches != NULL);
 139    address_space_cache_destroy(&caches->desc);
 140    address_space_cache_destroy(&caches->avail);
 141    address_space_cache_destroy(&caches->used);
 142    g_free(caches);
 143}
 144
 145static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 146{
 147    VRingMemoryRegionCaches *caches;
 148
 149    caches = qatomic_read(&vq->vring.caches);
 150    qatomic_rcu_set(&vq->vring.caches, NULL);
 151    if (caches) {
 152        call_rcu(caches, virtio_free_region_cache, rcu);
 153    }
 154}
 155
 156static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 157{
 158    VirtQueue *vq = &vdev->vq[n];
 159    VRingMemoryRegionCaches *old = vq->vring.caches;
 160    VRingMemoryRegionCaches *new = NULL;
 161    hwaddr addr, size;
 162    int64_t len;
 163    bool packed;
 164
 165
 166    addr = vq->vring.desc;
 167    if (!addr) {
 168        goto out_no_cache;
 169    }
 170    new = g_new0(VRingMemoryRegionCaches, 1);
 171    size = virtio_queue_get_desc_size(vdev, n);
 172    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 173                                   true : false;
 174    len = address_space_cache_init(&new->desc, vdev->dma_as,
 175                                   addr, size, packed);
 176    if (len < size) {
 177        virtio_error(vdev, "Cannot map desc");
 178        goto err_desc;
 179    }
 180
 181    size = virtio_queue_get_used_size(vdev, n);
 182    len = address_space_cache_init(&new->used, vdev->dma_as,
 183                                   vq->vring.used, size, true);
 184    if (len < size) {
 185        virtio_error(vdev, "Cannot map used");
 186        goto err_used;
 187    }
 188
 189    size = virtio_queue_get_avail_size(vdev, n);
 190    len = address_space_cache_init(&new->avail, vdev->dma_as,
 191                                   vq->vring.avail, size, false);
 192    if (len < size) {
 193        virtio_error(vdev, "Cannot map avail");
 194        goto err_avail;
 195    }
 196
 197    qatomic_rcu_set(&vq->vring.caches, new);
 198    if (old) {
 199        call_rcu(old, virtio_free_region_cache, rcu);
 200    }
 201    return;
 202
 203err_avail:
 204    address_space_cache_destroy(&new->avail);
 205err_used:
 206    address_space_cache_destroy(&new->used);
 207err_desc:
 208    address_space_cache_destroy(&new->desc);
 209out_no_cache:
 210    g_free(new);
 211    virtio_virtqueue_reset_region_cache(vq);
 212}
 213
 214/* virt queue functions */
 215void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 216{
 217    VRing *vring = &vdev->vq[n].vring;
 218
 219    if (!vring->num || !vring->desc || !vring->align) {
 220        /* not yet setup -> nothing to do */
 221        return;
 222    }
 223    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 224    vring->used = vring_align(vring->avail +
 225                              offsetof(VRingAvail, ring[vring->num]),
 226                              vring->align);
 227    virtio_init_region_cache(vdev, n);
 228}
 229
 230/* Called within rcu_read_lock().  */
 231static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 232                                  MemoryRegionCache *cache, int i)
 233{
 234    address_space_read_cached(cache, i * sizeof(VRingDesc),
 235                              desc, sizeof(VRingDesc));
 236    virtio_tswap64s(vdev, &desc->addr);
 237    virtio_tswap32s(vdev, &desc->len);
 238    virtio_tswap16s(vdev, &desc->flags);
 239    virtio_tswap16s(vdev, &desc->next);
 240}
 241
 242static void vring_packed_event_read(VirtIODevice *vdev,
 243                                    MemoryRegionCache *cache,
 244                                    VRingPackedDescEvent *e)
 245{
 246    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 247    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 248
 249    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 250    /* Make sure flags is seen before off_wrap */
 251    smp_rmb();
 252    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 253    virtio_tswap16s(vdev, &e->flags);
 254}
 255
 256static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 257                                        MemoryRegionCache *cache,
 258                                        uint16_t off_wrap)
 259{
 260    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 261
 262    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 263    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 264}
 265
 266static void vring_packed_flags_write(VirtIODevice *vdev,
 267                                     MemoryRegionCache *cache, uint16_t flags)
 268{
 269    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 270
 271    virtio_stw_phys_cached(vdev, cache, off, flags);
 272    address_space_cache_invalidate(cache, off, sizeof(flags));
 273}
 274
 275/* Called within rcu_read_lock().  */
 276static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 277{
 278    return qatomic_rcu_read(&vq->vring.caches);
 279}
 280
 281/* Called within rcu_read_lock().  */
 282static inline uint16_t vring_avail_flags(VirtQueue *vq)
 283{
 284    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 285    hwaddr pa = offsetof(VRingAvail, flags);
 286
 287    if (!caches) {
 288        return 0;
 289    }
 290
 291    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 292}
 293
 294/* Called within rcu_read_lock().  */
 295static inline uint16_t vring_avail_idx(VirtQueue *vq)
 296{
 297    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 298    hwaddr pa = offsetof(VRingAvail, idx);
 299
 300    if (!caches) {
 301        return 0;
 302    }
 303
 304    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 305    return vq->shadow_avail_idx;
 306}
 307
 308/* Called within rcu_read_lock().  */
 309static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 310{
 311    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 312    hwaddr pa = offsetof(VRingAvail, ring[i]);
 313
 314    if (!caches) {
 315        return 0;
 316    }
 317
 318    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 319}
 320
 321/* Called within rcu_read_lock().  */
 322static inline uint16_t vring_get_used_event(VirtQueue *vq)
 323{
 324    return vring_avail_ring(vq, vq->vring.num);
 325}
 326
 327/* Called within rcu_read_lock().  */
 328static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 329                                    int i)
 330{
 331    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 332    hwaddr pa = offsetof(VRingUsed, ring[i]);
 333
 334    if (!caches) {
 335        return;
 336    }
 337
 338    virtio_tswap32s(vq->vdev, &uelem->id);
 339    virtio_tswap32s(vq->vdev, &uelem->len);
 340    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 341    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 342}
 343
 344/* Called within rcu_read_lock().  */
 345static uint16_t vring_used_idx(VirtQueue *vq)
 346{
 347    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 348    hwaddr pa = offsetof(VRingUsed, idx);
 349
 350    if (!caches) {
 351        return 0;
 352    }
 353
 354    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 355}
 356
 357/* Called within rcu_read_lock().  */
 358static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 359{
 360    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 361    hwaddr pa = offsetof(VRingUsed, idx);
 362
 363    if (caches) {
 364        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 365        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 366    }
 367
 368    vq->used_idx = val;
 369}
 370
 371/* Called within rcu_read_lock().  */
 372static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 373{
 374    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 375    VirtIODevice *vdev = vq->vdev;
 376    hwaddr pa = offsetof(VRingUsed, flags);
 377    uint16_t flags;
 378
 379    if (!caches) {
 380        return;
 381    }
 382
 383    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 384    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 385    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 386}
 387
 388/* Called within rcu_read_lock().  */
 389static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 390{
 391    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 392    VirtIODevice *vdev = vq->vdev;
 393    hwaddr pa = offsetof(VRingUsed, flags);
 394    uint16_t flags;
 395
 396    if (!caches) {
 397        return;
 398    }
 399
 400    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 401    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 402    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 403}
 404
 405/* Called within rcu_read_lock().  */
 406static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 407{
 408    VRingMemoryRegionCaches *caches;
 409    hwaddr pa;
 410    if (!vq->notification) {
 411        return;
 412    }
 413
 414    caches = vring_get_region_caches(vq);
 415    if (!caches) {
 416        return;
 417    }
 418
 419    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 420    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 421    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 422}
 423
 424static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 425{
 426    RCU_READ_LOCK_GUARD();
 427
 428    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 429        vring_set_avail_event(vq, vring_avail_idx(vq));
 430    } else if (enable) {
 431        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 432    } else {
 433        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 434    }
 435    if (enable) {
 436        /* Expose avail event/used flags before caller checks the avail idx. */
 437        smp_mb();
 438    }
 439}
 440
 441static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 442{
 443    uint16_t off_wrap;
 444    VRingPackedDescEvent e;
 445    VRingMemoryRegionCaches *caches;
 446
 447    RCU_READ_LOCK_GUARD();
 448    caches = vring_get_region_caches(vq);
 449    if (!caches) {
 450        return;
 451    }
 452
 453    vring_packed_event_read(vq->vdev, &caches->used, &e);
 454
 455    if (!enable) {
 456        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 457    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 458        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 459        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 460        /* Make sure off_wrap is wrote before flags */
 461        smp_wmb();
 462        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 463    } else {
 464        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 465    }
 466
 467    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 468    if (enable) {
 469        /* Expose avail event/used flags before caller checks the avail idx. */
 470        smp_mb();
 471    }
 472}
 473
 474bool virtio_queue_get_notification(VirtQueue *vq)
 475{
 476    return vq->notification;
 477}
 478
 479void virtio_queue_set_notification(VirtQueue *vq, int enable)
 480{
 481    vq->notification = enable;
 482
 483    if (!vq->vring.desc) {
 484        return;
 485    }
 486
 487    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 488        virtio_queue_packed_set_notification(vq, enable);
 489    } else {
 490        virtio_queue_split_set_notification(vq, enable);
 491    }
 492}
 493
 494int virtio_queue_ready(VirtQueue *vq)
 495{
 496    return vq->vring.avail != 0;
 497}
 498
 499static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 500                                         uint16_t *flags,
 501                                         MemoryRegionCache *cache,
 502                                         int i)
 503{
 504    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 505
 506    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 507}
 508
 509static void vring_packed_desc_read(VirtIODevice *vdev,
 510                                   VRingPackedDesc *desc,
 511                                   MemoryRegionCache *cache,
 512                                   int i, bool strict_order)
 513{
 514    hwaddr off = i * sizeof(VRingPackedDesc);
 515
 516    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 517
 518    if (strict_order) {
 519        /* Make sure flags is read before the rest fields. */
 520        smp_rmb();
 521    }
 522
 523    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 524                              &desc->addr, sizeof(desc->addr));
 525    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 526                              &desc->id, sizeof(desc->id));
 527    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 528                              &desc->len, sizeof(desc->len));
 529    virtio_tswap64s(vdev, &desc->addr);
 530    virtio_tswap16s(vdev, &desc->id);
 531    virtio_tswap32s(vdev, &desc->len);
 532}
 533
 534static void vring_packed_desc_write_data(VirtIODevice *vdev,
 535                                         VRingPackedDesc *desc,
 536                                         MemoryRegionCache *cache,
 537                                         int i)
 538{
 539    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 540                    offsetof(VRingPackedDesc, id);
 541    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 542                    offsetof(VRingPackedDesc, len);
 543
 544    virtio_tswap32s(vdev, &desc->len);
 545    virtio_tswap16s(vdev, &desc->id);
 546    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 547    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 548    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 549    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 550}
 551
 552static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 553                                          VRingPackedDesc *desc,
 554                                          MemoryRegionCache *cache,
 555                                          int i)
 556{
 557    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 558
 559    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
 560    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 561}
 562
 563static void vring_packed_desc_write(VirtIODevice *vdev,
 564                                    VRingPackedDesc *desc,
 565                                    MemoryRegionCache *cache,
 566                                    int i, bool strict_order)
 567{
 568    vring_packed_desc_write_data(vdev, desc, cache, i);
 569    if (strict_order) {
 570        /* Make sure data is wrote before flags. */
 571        smp_wmb();
 572    }
 573    vring_packed_desc_write_flags(vdev, desc, cache, i);
 574}
 575
 576static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 577{
 578    bool avail, used;
 579
 580    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 581    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 582    return (avail != used) && (avail == wrap_counter);
 583}
 584
 585/* Fetch avail_idx from VQ memory only when we really need to know if
 586 * guest has added some buffers.
 587 * Called within rcu_read_lock().  */
 588static int virtio_queue_empty_rcu(VirtQueue *vq)
 589{
 590    if (virtio_device_disabled(vq->vdev)) {
 591        return 1;
 592    }
 593
 594    if (unlikely(!vq->vring.avail)) {
 595        return 1;
 596    }
 597
 598    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 599        return 0;
 600    }
 601
 602    return vring_avail_idx(vq) == vq->last_avail_idx;
 603}
 604
 605static int virtio_queue_split_empty(VirtQueue *vq)
 606{
 607    bool empty;
 608
 609    if (virtio_device_disabled(vq->vdev)) {
 610        return 1;
 611    }
 612
 613    if (unlikely(!vq->vring.avail)) {
 614        return 1;
 615    }
 616
 617    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 618        return 0;
 619    }
 620
 621    RCU_READ_LOCK_GUARD();
 622    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 623    return empty;
 624}
 625
 626/* Called within rcu_read_lock().  */
 627static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 628{
 629    struct VRingPackedDesc desc;
 630    VRingMemoryRegionCaches *cache;
 631
 632    if (unlikely(!vq->vring.desc)) {
 633        return 1;
 634    }
 635
 636    cache = vring_get_region_caches(vq);
 637    if (!cache) {
 638        return 1;
 639    }
 640
 641    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 642                                 vq->last_avail_idx);
 643
 644    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 645}
 646
 647static int virtio_queue_packed_empty(VirtQueue *vq)
 648{
 649    RCU_READ_LOCK_GUARD();
 650    return virtio_queue_packed_empty_rcu(vq);
 651}
 652
 653int virtio_queue_empty(VirtQueue *vq)
 654{
 655    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 656        return virtio_queue_packed_empty(vq);
 657    } else {
 658        return virtio_queue_split_empty(vq);
 659    }
 660}
 661
 662static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 663                               unsigned int len)
 664{
 665    AddressSpace *dma_as = vq->vdev->dma_as;
 666    unsigned int offset;
 667    int i;
 668
 669    offset = 0;
 670    for (i = 0; i < elem->in_num; i++) {
 671        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 672
 673        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 674                         elem->in_sg[i].iov_len,
 675                         DMA_DIRECTION_FROM_DEVICE, size);
 676
 677        offset += size;
 678    }
 679
 680    for (i = 0; i < elem->out_num; i++)
 681        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 682                         elem->out_sg[i].iov_len,
 683                         DMA_DIRECTION_TO_DEVICE,
 684                         elem->out_sg[i].iov_len);
 685}
 686
 687/* virtqueue_detach_element:
 688 * @vq: The #VirtQueue
 689 * @elem: The #VirtQueueElement
 690 * @len: number of bytes written
 691 *
 692 * Detach the element from the virtqueue.  This function is suitable for device
 693 * reset or other situations where a #VirtQueueElement is simply freed and will
 694 * not be pushed or discarded.
 695 */
 696void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 697                              unsigned int len)
 698{
 699    vq->inuse -= elem->ndescs;
 700    virtqueue_unmap_sg(vq, elem, len);
 701}
 702
 703static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 704{
 705    vq->last_avail_idx -= num;
 706}
 707
 708static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 709{
 710    if (vq->last_avail_idx < num) {
 711        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 712        vq->last_avail_wrap_counter ^= 1;
 713    } else {
 714        vq->last_avail_idx -= num;
 715    }
 716}
 717
 718/* virtqueue_unpop:
 719 * @vq: The #VirtQueue
 720 * @elem: The #VirtQueueElement
 721 * @len: number of bytes written
 722 *
 723 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 724 * call to virtqueue_pop() will refetch the element.
 725 */
 726void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 727                     unsigned int len)
 728{
 729
 730    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 731        virtqueue_packed_rewind(vq, 1);
 732    } else {
 733        virtqueue_split_rewind(vq, 1);
 734    }
 735
 736    virtqueue_detach_element(vq, elem, len);
 737}
 738
 739/* virtqueue_rewind:
 740 * @vq: The #VirtQueue
 741 * @num: Number of elements to push back
 742 *
 743 * Pretend that elements weren't popped from the virtqueue.  The next
 744 * virtqueue_pop() will refetch the oldest element.
 745 *
 746 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 747 *
 748 * Returns: true on success, false if @num is greater than the number of in use
 749 * elements.
 750 */
 751bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 752{
 753    if (num > vq->inuse) {
 754        return false;
 755    }
 756
 757    vq->inuse -= num;
 758    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 759        virtqueue_packed_rewind(vq, num);
 760    } else {
 761        virtqueue_split_rewind(vq, num);
 762    }
 763    return true;
 764}
 765
 766static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 767                    unsigned int len, unsigned int idx)
 768{
 769    VRingUsedElem uelem;
 770
 771    if (unlikely(!vq->vring.used)) {
 772        return;
 773    }
 774
 775    idx = (idx + vq->used_idx) % vq->vring.num;
 776
 777    uelem.id = elem->index;
 778    uelem.len = len;
 779    vring_used_write(vq, &uelem, idx);
 780}
 781
 782static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 783                                  unsigned int len, unsigned int idx)
 784{
 785    vq->used_elems[idx].index = elem->index;
 786    vq->used_elems[idx].len = len;
 787    vq->used_elems[idx].ndescs = elem->ndescs;
 788}
 789
 790static void virtqueue_packed_fill_desc(VirtQueue *vq,
 791                                       const VirtQueueElement *elem,
 792                                       unsigned int idx,
 793                                       bool strict_order)
 794{
 795    uint16_t head;
 796    VRingMemoryRegionCaches *caches;
 797    VRingPackedDesc desc = {
 798        .id = elem->index,
 799        .len = elem->len,
 800    };
 801    bool wrap_counter = vq->used_wrap_counter;
 802
 803    if (unlikely(!vq->vring.desc)) {
 804        return;
 805    }
 806
 807    head = vq->used_idx + idx;
 808    if (head >= vq->vring.num) {
 809        head -= vq->vring.num;
 810        wrap_counter ^= 1;
 811    }
 812    if (wrap_counter) {
 813        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 814        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 815    } else {
 816        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 817        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 818    }
 819
 820    caches = vring_get_region_caches(vq);
 821    if (!caches) {
 822        return;
 823    }
 824
 825    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 826}
 827
 828/* Called within rcu_read_lock().  */
 829void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 830                    unsigned int len, unsigned int idx)
 831{
 832    trace_virtqueue_fill(vq, elem, len, idx);
 833
 834    virtqueue_unmap_sg(vq, elem, len);
 835
 836    if (virtio_device_disabled(vq->vdev)) {
 837        return;
 838    }
 839
 840    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 841        virtqueue_packed_fill(vq, elem, len, idx);
 842    } else {
 843        virtqueue_split_fill(vq, elem, len, idx);
 844    }
 845}
 846
 847/* Called within rcu_read_lock().  */
 848static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 849{
 850    uint16_t old, new;
 851
 852    if (unlikely(!vq->vring.used)) {
 853        return;
 854    }
 855
 856    /* Make sure buffer is written before we update index. */
 857    smp_wmb();
 858    trace_virtqueue_flush(vq, count);
 859    old = vq->used_idx;
 860    new = old + count;
 861    vring_used_idx_set(vq, new);
 862    vq->inuse -= count;
 863    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 864        vq->signalled_used_valid = false;
 865}
 866
 867static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 868{
 869    unsigned int i, ndescs = 0;
 870
 871    if (unlikely(!vq->vring.desc)) {
 872        return;
 873    }
 874
 875    for (i = 1; i < count; i++) {
 876        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 877        ndescs += vq->used_elems[i].ndescs;
 878    }
 879    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 880    ndescs += vq->used_elems[0].ndescs;
 881
 882    vq->inuse -= ndescs;
 883    vq->used_idx += ndescs;
 884    if (vq->used_idx >= vq->vring.num) {
 885        vq->used_idx -= vq->vring.num;
 886        vq->used_wrap_counter ^= 1;
 887        vq->signalled_used_valid = false;
 888    }
 889}
 890
 891void virtqueue_flush(VirtQueue *vq, unsigned int count)
 892{
 893    if (virtio_device_disabled(vq->vdev)) {
 894        vq->inuse -= count;
 895        return;
 896    }
 897
 898    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 899        virtqueue_packed_flush(vq, count);
 900    } else {
 901        virtqueue_split_flush(vq, count);
 902    }
 903}
 904
 905void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 906                    unsigned int len)
 907{
 908    RCU_READ_LOCK_GUARD();
 909    virtqueue_fill(vq, elem, len, 0);
 910    virtqueue_flush(vq, 1);
 911}
 912
 913/* Called within rcu_read_lock().  */
 914static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 915{
 916    uint16_t num_heads = vring_avail_idx(vq) - idx;
 917
 918    /* Check it isn't doing very strange things with descriptor numbers. */
 919    if (num_heads > vq->vring.num) {
 920        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 921                     idx, vq->shadow_avail_idx);
 922        return -EINVAL;
 923    }
 924    /* On success, callers read a descriptor at vq->last_avail_idx.
 925     * Make sure descriptor read does not bypass avail index read. */
 926    if (num_heads) {
 927        smp_rmb();
 928    }
 929
 930    return num_heads;
 931}
 932
 933/* Called within rcu_read_lock().  */
 934static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 935                               unsigned int *head)
 936{
 937    /* Grab the next descriptor number they're advertising, and increment
 938     * the index we've seen. */
 939    *head = vring_avail_ring(vq, idx % vq->vring.num);
 940
 941    /* If their number is silly, that's a fatal mistake. */
 942    if (*head >= vq->vring.num) {
 943        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 944        return false;
 945    }
 946
 947    return true;
 948}
 949
 950enum {
 951    VIRTQUEUE_READ_DESC_ERROR = -1,
 952    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 953    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 954};
 955
 956static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 957                                          MemoryRegionCache *desc_cache,
 958                                          unsigned int max, unsigned int *next)
 959{
 960    /* If this descriptor says it doesn't chain, we're done. */
 961    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 962        return VIRTQUEUE_READ_DESC_DONE;
 963    }
 964
 965    /* Check they're not leading us off end of descriptors. */
 966    *next = desc->next;
 967    /* Make sure compiler knows to grab that: we don't want it changing! */
 968    smp_wmb();
 969
 970    if (*next >= max) {
 971        virtio_error(vdev, "Desc next is %u", *next);
 972        return VIRTQUEUE_READ_DESC_ERROR;
 973    }
 974
 975    vring_split_desc_read(vdev, desc, desc_cache, *next);
 976    return VIRTQUEUE_READ_DESC_MORE;
 977}
 978
 979/* Called within rcu_read_lock().  */
 980static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
 981                            unsigned int *in_bytes, unsigned int *out_bytes,
 982                            unsigned max_in_bytes, unsigned max_out_bytes,
 983                            VRingMemoryRegionCaches *caches)
 984{
 985    VirtIODevice *vdev = vq->vdev;
 986    unsigned int max, idx;
 987    unsigned int total_bufs, in_total, out_total;
 988    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 989    int64_t len = 0;
 990    int rc;
 991
 992    idx = vq->last_avail_idx;
 993    total_bufs = in_total = out_total = 0;
 994
 995    max = vq->vring.num;
 996
 997    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 998        MemoryRegionCache *desc_cache = &caches->desc;
 999        unsigned int num_bufs;
1000        VRingDesc desc;
1001        unsigned int i;
1002
1003        num_bufs = total_bufs;
1004
1005        if (!virtqueue_get_head(vq, idx++, &i)) {
1006            goto err;
1007        }
1008
1009        vring_split_desc_read(vdev, &desc, desc_cache, i);
1010
1011        if (desc.flags & VRING_DESC_F_INDIRECT) {
1012            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1013                virtio_error(vdev, "Invalid size for indirect buffer table");
1014                goto err;
1015            }
1016
1017            /* If we've got too many, that implies a descriptor loop. */
1018            if (num_bufs >= max) {
1019                virtio_error(vdev, "Looped descriptor");
1020                goto err;
1021            }
1022
1023            /* loop over the indirect descriptor table */
1024            len = address_space_cache_init(&indirect_desc_cache,
1025                                           vdev->dma_as,
1026                                           desc.addr, desc.len, false);
1027            desc_cache = &indirect_desc_cache;
1028            if (len < desc.len) {
1029                virtio_error(vdev, "Cannot map indirect buffer");
1030                goto err;
1031            }
1032
1033            max = desc.len / sizeof(VRingDesc);
1034            num_bufs = i = 0;
1035            vring_split_desc_read(vdev, &desc, desc_cache, i);
1036        }
1037
1038        do {
1039            /* If we've got too many, that implies a descriptor loop. */
1040            if (++num_bufs > max) {
1041                virtio_error(vdev, "Looped descriptor");
1042                goto err;
1043            }
1044
1045            if (desc.flags & VRING_DESC_F_WRITE) {
1046                in_total += desc.len;
1047            } else {
1048                out_total += desc.len;
1049            }
1050            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1051                goto done;
1052            }
1053
1054            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1055        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1056
1057        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1058            goto err;
1059        }
1060
1061        if (desc_cache == &indirect_desc_cache) {
1062            address_space_cache_destroy(&indirect_desc_cache);
1063            total_bufs++;
1064        } else {
1065            total_bufs = num_bufs;
1066        }
1067    }
1068
1069    if (rc < 0) {
1070        goto err;
1071    }
1072
1073done:
1074    address_space_cache_destroy(&indirect_desc_cache);
1075    if (in_bytes) {
1076        *in_bytes = in_total;
1077    }
1078    if (out_bytes) {
1079        *out_bytes = out_total;
1080    }
1081    return;
1082
1083err:
1084    in_total = out_total = 0;
1085    goto done;
1086}
1087
1088static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1089                                           VRingPackedDesc *desc,
1090                                           MemoryRegionCache
1091                                           *desc_cache,
1092                                           unsigned int max,
1093                                           unsigned int *next,
1094                                           bool indirect)
1095{
1096    /* If this descriptor says it doesn't chain, we're done. */
1097    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1098        return VIRTQUEUE_READ_DESC_DONE;
1099    }
1100
1101    ++*next;
1102    if (*next == max) {
1103        if (indirect) {
1104            return VIRTQUEUE_READ_DESC_DONE;
1105        } else {
1106            (*next) -= vq->vring.num;
1107        }
1108    }
1109
1110    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1111    return VIRTQUEUE_READ_DESC_MORE;
1112}
1113
1114/* Called within rcu_read_lock().  */
1115static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1116                                             unsigned int *in_bytes,
1117                                             unsigned int *out_bytes,
1118                                             unsigned max_in_bytes,
1119                                             unsigned max_out_bytes,
1120                                             VRingMemoryRegionCaches *caches)
1121{
1122    VirtIODevice *vdev = vq->vdev;
1123    unsigned int max, idx;
1124    unsigned int total_bufs, in_total, out_total;
1125    MemoryRegionCache *desc_cache;
1126    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1127    int64_t len = 0;
1128    VRingPackedDesc desc;
1129    bool wrap_counter;
1130
1131    idx = vq->last_avail_idx;
1132    wrap_counter = vq->last_avail_wrap_counter;
1133    total_bufs = in_total = out_total = 0;
1134
1135    max = vq->vring.num;
1136
1137    for (;;) {
1138        unsigned int num_bufs = total_bufs;
1139        unsigned int i = idx;
1140        int rc;
1141
1142        desc_cache = &caches->desc;
1143        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1144        if (!is_desc_avail(desc.flags, wrap_counter)) {
1145            break;
1146        }
1147
1148        if (desc.flags & VRING_DESC_F_INDIRECT) {
1149            if (desc.len % sizeof(VRingPackedDesc)) {
1150                virtio_error(vdev, "Invalid size for indirect buffer table");
1151                goto err;
1152            }
1153
1154            /* If we've got too many, that implies a descriptor loop. */
1155            if (num_bufs >= max) {
1156                virtio_error(vdev, "Looped descriptor");
1157                goto err;
1158            }
1159
1160            /* loop over the indirect descriptor table */
1161            len = address_space_cache_init(&indirect_desc_cache,
1162                                           vdev->dma_as,
1163                                           desc.addr, desc.len, false);
1164            desc_cache = &indirect_desc_cache;
1165            if (len < desc.len) {
1166                virtio_error(vdev, "Cannot map indirect buffer");
1167                goto err;
1168            }
1169
1170            max = desc.len / sizeof(VRingPackedDesc);
1171            num_bufs = i = 0;
1172            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1173        }
1174
1175        do {
1176            /* If we've got too many, that implies a descriptor loop. */
1177            if (++num_bufs > max) {
1178                virtio_error(vdev, "Looped descriptor");
1179                goto err;
1180            }
1181
1182            if (desc.flags & VRING_DESC_F_WRITE) {
1183                in_total += desc.len;
1184            } else {
1185                out_total += desc.len;
1186            }
1187            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1188                goto done;
1189            }
1190
1191            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1192                                                 &i, desc_cache ==
1193                                                 &indirect_desc_cache);
1194        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1195
1196        if (desc_cache == &indirect_desc_cache) {
1197            address_space_cache_destroy(&indirect_desc_cache);
1198            total_bufs++;
1199            idx++;
1200        } else {
1201            idx += num_bufs - total_bufs;
1202            total_bufs = num_bufs;
1203        }
1204
1205        if (idx >= vq->vring.num) {
1206            idx -= vq->vring.num;
1207            wrap_counter ^= 1;
1208        }
1209    }
1210
1211    /* Record the index and wrap counter for a kick we want */
1212    vq->shadow_avail_idx = idx;
1213    vq->shadow_avail_wrap_counter = wrap_counter;
1214done:
1215    address_space_cache_destroy(&indirect_desc_cache);
1216    if (in_bytes) {
1217        *in_bytes = in_total;
1218    }
1219    if (out_bytes) {
1220        *out_bytes = out_total;
1221    }
1222    return;
1223
1224err:
1225    in_total = out_total = 0;
1226    goto done;
1227}
1228
1229void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1230                               unsigned int *out_bytes,
1231                               unsigned max_in_bytes, unsigned max_out_bytes)
1232{
1233    uint16_t desc_size;
1234    VRingMemoryRegionCaches *caches;
1235
1236    RCU_READ_LOCK_GUARD();
1237
1238    if (unlikely(!vq->vring.desc)) {
1239        goto err;
1240    }
1241
1242    caches = vring_get_region_caches(vq);
1243    if (!caches) {
1244        goto err;
1245    }
1246
1247    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1248                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1249    if (caches->desc.len < vq->vring.num * desc_size) {
1250        virtio_error(vq->vdev, "Cannot map descriptor ring");
1251        goto err;
1252    }
1253
1254    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1255        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1256                                         max_in_bytes, max_out_bytes,
1257                                         caches);
1258    } else {
1259        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1260                                        max_in_bytes, max_out_bytes,
1261                                        caches);
1262    }
1263
1264    return;
1265err:
1266    if (in_bytes) {
1267        *in_bytes = 0;
1268    }
1269    if (out_bytes) {
1270        *out_bytes = 0;
1271    }
1272}
1273
1274int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1275                          unsigned int out_bytes)
1276{
1277    unsigned int in_total, out_total;
1278
1279    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1280    return in_bytes <= in_total && out_bytes <= out_total;
1281}
1282
1283static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1284                               hwaddr *addr, struct iovec *iov,
1285                               unsigned int max_num_sg, bool is_write,
1286                               hwaddr pa, size_t sz)
1287{
1288    bool ok = false;
1289    unsigned num_sg = *p_num_sg;
1290    assert(num_sg <= max_num_sg);
1291
1292    if (!sz) {
1293        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1294        goto out;
1295    }
1296
1297    while (sz) {
1298        hwaddr len = sz;
1299
1300        if (num_sg == max_num_sg) {
1301            virtio_error(vdev, "virtio: too many write descriptors in "
1302                               "indirect table");
1303            goto out;
1304        }
1305
1306        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1307                                              is_write ?
1308                                              DMA_DIRECTION_FROM_DEVICE :
1309                                              DMA_DIRECTION_TO_DEVICE,
1310                                              MEMTXATTRS_UNSPECIFIED);
1311        if (!iov[num_sg].iov_base) {
1312            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1313            goto out;
1314        }
1315
1316        iov[num_sg].iov_len = len;
1317        addr[num_sg] = pa;
1318
1319        sz -= len;
1320        pa += len;
1321        num_sg++;
1322    }
1323    ok = true;
1324
1325out:
1326    *p_num_sg = num_sg;
1327    return ok;
1328}
1329
1330/* Only used by error code paths before we have a VirtQueueElement (therefore
1331 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1332 * yet.
1333 */
1334static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1335                                    struct iovec *iov)
1336{
1337    unsigned int i;
1338
1339    for (i = 0; i < out_num + in_num; i++) {
1340        int is_write = i >= out_num;
1341
1342        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1343        iov++;
1344    }
1345}
1346
1347static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1348                                hwaddr *addr, unsigned int num_sg,
1349                                bool is_write)
1350{
1351    unsigned int i;
1352    hwaddr len;
1353
1354    for (i = 0; i < num_sg; i++) {
1355        len = sg[i].iov_len;
1356        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1357                                        addr[i], &len, is_write ?
1358                                        DMA_DIRECTION_FROM_DEVICE :
1359                                        DMA_DIRECTION_TO_DEVICE,
1360                                        MEMTXATTRS_UNSPECIFIED);
1361        if (!sg[i].iov_base) {
1362            error_report("virtio: error trying to map MMIO memory");
1363            exit(1);
1364        }
1365        if (len != sg[i].iov_len) {
1366            error_report("virtio: unexpected memory split");
1367            exit(1);
1368        }
1369    }
1370}
1371
1372void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1373{
1374    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1375    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1376                                                                        false);
1377}
1378
1379static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1380{
1381    VirtQueueElement *elem;
1382    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1383    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1384    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1385    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1386    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1387    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1388
1389    assert(sz >= sizeof(VirtQueueElement));
1390    elem = g_malloc(out_sg_end);
1391    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1392    elem->out_num = out_num;
1393    elem->in_num = in_num;
1394    elem->in_addr = (void *)elem + in_addr_ofs;
1395    elem->out_addr = (void *)elem + out_addr_ofs;
1396    elem->in_sg = (void *)elem + in_sg_ofs;
1397    elem->out_sg = (void *)elem + out_sg_ofs;
1398    return elem;
1399}
1400
1401static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1402{
1403    unsigned int i, head, max;
1404    VRingMemoryRegionCaches *caches;
1405    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1406    MemoryRegionCache *desc_cache;
1407    int64_t len;
1408    VirtIODevice *vdev = vq->vdev;
1409    VirtQueueElement *elem = NULL;
1410    unsigned out_num, in_num, elem_entries;
1411    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1412    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1413    VRingDesc desc;
1414    int rc;
1415
1416    RCU_READ_LOCK_GUARD();
1417    if (virtio_queue_empty_rcu(vq)) {
1418        goto done;
1419    }
1420    /* Needed after virtio_queue_empty(), see comment in
1421     * virtqueue_num_heads(). */
1422    smp_rmb();
1423
1424    /* When we start there are none of either input nor output. */
1425    out_num = in_num = elem_entries = 0;
1426
1427    max = vq->vring.num;
1428
1429    if (vq->inuse >= vq->vring.num) {
1430        virtio_error(vdev, "Virtqueue size exceeded");
1431        goto done;
1432    }
1433
1434    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1435        goto done;
1436    }
1437
1438    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1439        vring_set_avail_event(vq, vq->last_avail_idx);
1440    }
1441
1442    i = head;
1443
1444    caches = vring_get_region_caches(vq);
1445    if (!caches) {
1446        virtio_error(vdev, "Region caches not initialized");
1447        goto done;
1448    }
1449
1450    if (caches->desc.len < max * sizeof(VRingDesc)) {
1451        virtio_error(vdev, "Cannot map descriptor ring");
1452        goto done;
1453    }
1454
1455    desc_cache = &caches->desc;
1456    vring_split_desc_read(vdev, &desc, desc_cache, i);
1457    if (desc.flags & VRING_DESC_F_INDIRECT) {
1458        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1459            virtio_error(vdev, "Invalid size for indirect buffer table");
1460            goto done;
1461        }
1462
1463        /* loop over the indirect descriptor table */
1464        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1465                                       desc.addr, desc.len, false);
1466        desc_cache = &indirect_desc_cache;
1467        if (len < desc.len) {
1468            virtio_error(vdev, "Cannot map indirect buffer");
1469            goto done;
1470        }
1471
1472        max = desc.len / sizeof(VRingDesc);
1473        i = 0;
1474        vring_split_desc_read(vdev, &desc, desc_cache, i);
1475    }
1476
1477    /* Collect all the descriptors */
1478    do {
1479        bool map_ok;
1480
1481        if (desc.flags & VRING_DESC_F_WRITE) {
1482            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1483                                        iov + out_num,
1484                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1485                                        desc.addr, desc.len);
1486        } else {
1487            if (in_num) {
1488                virtio_error(vdev, "Incorrect order for descriptors");
1489                goto err_undo_map;
1490            }
1491            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1492                                        VIRTQUEUE_MAX_SIZE, false,
1493                                        desc.addr, desc.len);
1494        }
1495        if (!map_ok) {
1496            goto err_undo_map;
1497        }
1498
1499        /* If we've got too many, that implies a descriptor loop. */
1500        if (++elem_entries > max) {
1501            virtio_error(vdev, "Looped descriptor");
1502            goto err_undo_map;
1503        }
1504
1505        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1506    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1507
1508    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1509        goto err_undo_map;
1510    }
1511
1512    /* Now copy what we have collected and mapped */
1513    elem = virtqueue_alloc_element(sz, out_num, in_num);
1514    elem->index = head;
1515    elem->ndescs = 1;
1516    for (i = 0; i < out_num; i++) {
1517        elem->out_addr[i] = addr[i];
1518        elem->out_sg[i] = iov[i];
1519    }
1520    for (i = 0; i < in_num; i++) {
1521        elem->in_addr[i] = addr[out_num + i];
1522        elem->in_sg[i] = iov[out_num + i];
1523    }
1524
1525    vq->inuse++;
1526
1527    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1528done:
1529    address_space_cache_destroy(&indirect_desc_cache);
1530
1531    return elem;
1532
1533err_undo_map:
1534    virtqueue_undo_map_desc(out_num, in_num, iov);
1535    goto done;
1536}
1537
1538static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1539{
1540    unsigned int i, max;
1541    VRingMemoryRegionCaches *caches;
1542    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1543    MemoryRegionCache *desc_cache;
1544    int64_t len;
1545    VirtIODevice *vdev = vq->vdev;
1546    VirtQueueElement *elem = NULL;
1547    unsigned out_num, in_num, elem_entries;
1548    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1549    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1550    VRingPackedDesc desc;
1551    uint16_t id;
1552    int rc;
1553
1554    RCU_READ_LOCK_GUARD();
1555    if (virtio_queue_packed_empty_rcu(vq)) {
1556        goto done;
1557    }
1558
1559    /* When we start there are none of either input nor output. */
1560    out_num = in_num = elem_entries = 0;
1561
1562    max = vq->vring.num;
1563
1564    if (vq->inuse >= vq->vring.num) {
1565        virtio_error(vdev, "Virtqueue size exceeded");
1566        goto done;
1567    }
1568
1569    i = vq->last_avail_idx;
1570
1571    caches = vring_get_region_caches(vq);
1572    if (!caches) {
1573        virtio_error(vdev, "Region caches not initialized");
1574        goto done;
1575    }
1576
1577    if (caches->desc.len < max * sizeof(VRingDesc)) {
1578        virtio_error(vdev, "Cannot map descriptor ring");
1579        goto done;
1580    }
1581
1582    desc_cache = &caches->desc;
1583    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1584    id = desc.id;
1585    if (desc.flags & VRING_DESC_F_INDIRECT) {
1586        if (desc.len % sizeof(VRingPackedDesc)) {
1587            virtio_error(vdev, "Invalid size for indirect buffer table");
1588            goto done;
1589        }
1590
1591        /* loop over the indirect descriptor table */
1592        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1593                                       desc.addr, desc.len, false);
1594        desc_cache = &indirect_desc_cache;
1595        if (len < desc.len) {
1596            virtio_error(vdev, "Cannot map indirect buffer");
1597            goto done;
1598        }
1599
1600        max = desc.len / sizeof(VRingPackedDesc);
1601        i = 0;
1602        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1603    }
1604
1605    /* Collect all the descriptors */
1606    do {
1607        bool map_ok;
1608
1609        if (desc.flags & VRING_DESC_F_WRITE) {
1610            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1611                                        iov + out_num,
1612                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1613                                        desc.addr, desc.len);
1614        } else {
1615            if (in_num) {
1616                virtio_error(vdev, "Incorrect order for descriptors");
1617                goto err_undo_map;
1618            }
1619            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1620                                        VIRTQUEUE_MAX_SIZE, false,
1621                                        desc.addr, desc.len);
1622        }
1623        if (!map_ok) {
1624            goto err_undo_map;
1625        }
1626
1627        /* If we've got too many, that implies a descriptor loop. */
1628        if (++elem_entries > max) {
1629            virtio_error(vdev, "Looped descriptor");
1630            goto err_undo_map;
1631        }
1632
1633        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1634                                             desc_cache ==
1635                                             &indirect_desc_cache);
1636    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1637
1638    /* Now copy what we have collected and mapped */
1639    elem = virtqueue_alloc_element(sz, out_num, in_num);
1640    for (i = 0; i < out_num; i++) {
1641        elem->out_addr[i] = addr[i];
1642        elem->out_sg[i] = iov[i];
1643    }
1644    for (i = 0; i < in_num; i++) {
1645        elem->in_addr[i] = addr[out_num + i];
1646        elem->in_sg[i] = iov[out_num + i];
1647    }
1648
1649    elem->index = id;
1650    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1651    vq->last_avail_idx += elem->ndescs;
1652    vq->inuse += elem->ndescs;
1653
1654    if (vq->last_avail_idx >= vq->vring.num) {
1655        vq->last_avail_idx -= vq->vring.num;
1656        vq->last_avail_wrap_counter ^= 1;
1657    }
1658
1659    vq->shadow_avail_idx = vq->last_avail_idx;
1660    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1661
1662    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1663done:
1664    address_space_cache_destroy(&indirect_desc_cache);
1665
1666    return elem;
1667
1668err_undo_map:
1669    virtqueue_undo_map_desc(out_num, in_num, iov);
1670    goto done;
1671}
1672
1673void *virtqueue_pop(VirtQueue *vq, size_t sz)
1674{
1675    if (virtio_device_disabled(vq->vdev)) {
1676        return NULL;
1677    }
1678
1679    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1680        return virtqueue_packed_pop(vq, sz);
1681    } else {
1682        return virtqueue_split_pop(vq, sz);
1683    }
1684}
1685
1686static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1687{
1688    VRingMemoryRegionCaches *caches;
1689    MemoryRegionCache *desc_cache;
1690    unsigned int dropped = 0;
1691    VirtQueueElement elem = {};
1692    VirtIODevice *vdev = vq->vdev;
1693    VRingPackedDesc desc;
1694
1695    RCU_READ_LOCK_GUARD();
1696
1697    caches = vring_get_region_caches(vq);
1698    if (!caches) {
1699        return 0;
1700    }
1701
1702    desc_cache = &caches->desc;
1703
1704    virtio_queue_set_notification(vq, 0);
1705
1706    while (vq->inuse < vq->vring.num) {
1707        unsigned int idx = vq->last_avail_idx;
1708        /*
1709         * works similar to virtqueue_pop but does not map buffers
1710         * and does not allocate any memory.
1711         */
1712        vring_packed_desc_read(vdev, &desc, desc_cache,
1713                               vq->last_avail_idx , true);
1714        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1715            break;
1716        }
1717        elem.index = desc.id;
1718        elem.ndescs = 1;
1719        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1720                                               vq->vring.num, &idx, false)) {
1721            ++elem.ndescs;
1722        }
1723        /*
1724         * immediately push the element, nothing to unmap
1725         * as both in_num and out_num are set to 0.
1726         */
1727        virtqueue_push(vq, &elem, 0);
1728        dropped++;
1729        vq->last_avail_idx += elem.ndescs;
1730        if (vq->last_avail_idx >= vq->vring.num) {
1731            vq->last_avail_idx -= vq->vring.num;
1732            vq->last_avail_wrap_counter ^= 1;
1733        }
1734    }
1735
1736    return dropped;
1737}
1738
1739static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1740{
1741    unsigned int dropped = 0;
1742    VirtQueueElement elem = {};
1743    VirtIODevice *vdev = vq->vdev;
1744    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1745
1746    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1747        /* works similar to virtqueue_pop but does not map buffers
1748        * and does not allocate any memory */
1749        smp_rmb();
1750        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1751            break;
1752        }
1753        vq->inuse++;
1754        vq->last_avail_idx++;
1755        if (fEventIdx) {
1756            vring_set_avail_event(vq, vq->last_avail_idx);
1757        }
1758        /* immediately push the element, nothing to unmap
1759         * as both in_num and out_num are set to 0 */
1760        virtqueue_push(vq, &elem, 0);
1761        dropped++;
1762    }
1763
1764    return dropped;
1765}
1766
1767/* virtqueue_drop_all:
1768 * @vq: The #VirtQueue
1769 * Drops all queued buffers and indicates them to the guest
1770 * as if they are done. Useful when buffers can not be
1771 * processed but must be returned to the guest.
1772 */
1773unsigned int virtqueue_drop_all(VirtQueue *vq)
1774{
1775    struct VirtIODevice *vdev = vq->vdev;
1776
1777    if (virtio_device_disabled(vq->vdev)) {
1778        return 0;
1779    }
1780
1781    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1782        return virtqueue_packed_drop_all(vq);
1783    } else {
1784        return virtqueue_split_drop_all(vq);
1785    }
1786}
1787
1788/* Reading and writing a structure directly to QEMUFile is *awful*, but
1789 * it is what QEMU has always done by mistake.  We can change it sooner
1790 * or later by bumping the version number of the affected vm states.
1791 * In the meanwhile, since the in-memory layout of VirtQueueElement
1792 * has changed, we need to marshal to and from the layout that was
1793 * used before the change.
1794 */
1795typedef struct VirtQueueElementOld {
1796    unsigned int index;
1797    unsigned int out_num;
1798    unsigned int in_num;
1799    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1800    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1801    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1802    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1803} VirtQueueElementOld;
1804
1805void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1806{
1807    VirtQueueElement *elem;
1808    VirtQueueElementOld data;
1809    int i;
1810
1811    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1812
1813    /* TODO: teach all callers that this can fail, and return failure instead
1814     * of asserting here.
1815     * This is just one thing (there are probably more) that must be
1816     * fixed before we can allow NDEBUG compilation.
1817     */
1818    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1819    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1820
1821    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1822    elem->index = data.index;
1823
1824    for (i = 0; i < elem->in_num; i++) {
1825        elem->in_addr[i] = data.in_addr[i];
1826    }
1827
1828    for (i = 0; i < elem->out_num; i++) {
1829        elem->out_addr[i] = data.out_addr[i];
1830    }
1831
1832    for (i = 0; i < elem->in_num; i++) {
1833        /* Base is overwritten by virtqueue_map.  */
1834        elem->in_sg[i].iov_base = 0;
1835        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1836    }
1837
1838    for (i = 0; i < elem->out_num; i++) {
1839        /* Base is overwritten by virtqueue_map.  */
1840        elem->out_sg[i].iov_base = 0;
1841        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1842    }
1843
1844    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1845        qemu_get_be32s(f, &elem->ndescs);
1846    }
1847
1848    virtqueue_map(vdev, elem);
1849    return elem;
1850}
1851
1852void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1853                                VirtQueueElement *elem)
1854{
1855    VirtQueueElementOld data;
1856    int i;
1857
1858    memset(&data, 0, sizeof(data));
1859    data.index = elem->index;
1860    data.in_num = elem->in_num;
1861    data.out_num = elem->out_num;
1862
1863    for (i = 0; i < elem->in_num; i++) {
1864        data.in_addr[i] = elem->in_addr[i];
1865    }
1866
1867    for (i = 0; i < elem->out_num; i++) {
1868        data.out_addr[i] = elem->out_addr[i];
1869    }
1870
1871    for (i = 0; i < elem->in_num; i++) {
1872        /* Base is overwritten by virtqueue_map when loading.  Do not
1873         * save it, as it would leak the QEMU address space layout.  */
1874        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1875    }
1876
1877    for (i = 0; i < elem->out_num; i++) {
1878        /* Do not save iov_base as above.  */
1879        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1880    }
1881
1882    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1883        qemu_put_be32s(f, &elem->ndescs);
1884    }
1885
1886    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1887}
1888
1889/* virtio device */
1890static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1891{
1892    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1893    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1894
1895    if (virtio_device_disabled(vdev)) {
1896        return;
1897    }
1898
1899    if (k->notify) {
1900        k->notify(qbus->parent, vector);
1901    }
1902}
1903
1904void virtio_update_irq(VirtIODevice *vdev)
1905{
1906    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1907}
1908
1909static int virtio_validate_features(VirtIODevice *vdev)
1910{
1911    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1912
1913    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1914        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1915        return -EFAULT;
1916    }
1917
1918    if (k->validate_features) {
1919        return k->validate_features(vdev);
1920    } else {
1921        return 0;
1922    }
1923}
1924
1925int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1926{
1927    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1928    trace_virtio_set_status(vdev, val);
1929
1930    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1931        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1932            val & VIRTIO_CONFIG_S_FEATURES_OK) {
1933            int ret = virtio_validate_features(vdev);
1934
1935            if (ret) {
1936                return ret;
1937            }
1938        }
1939    }
1940
1941    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1942        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1943        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1944    }
1945
1946    if (k->set_status) {
1947        k->set_status(vdev, val);
1948    }
1949    vdev->status = val;
1950
1951    return 0;
1952}
1953
1954static enum virtio_device_endian virtio_default_endian(void)
1955{
1956    if (target_words_bigendian()) {
1957        return VIRTIO_DEVICE_ENDIAN_BIG;
1958    } else {
1959        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1960    }
1961}
1962
1963static enum virtio_device_endian virtio_current_cpu_endian(void)
1964{
1965    if (cpu_virtio_is_big_endian(current_cpu)) {
1966        return VIRTIO_DEVICE_ENDIAN_BIG;
1967    } else {
1968        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1969    }
1970}
1971
1972void virtio_reset(void *opaque)
1973{
1974    VirtIODevice *vdev = opaque;
1975    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1976    int i;
1977
1978    virtio_set_status(vdev, 0);
1979    if (current_cpu) {
1980        /* Guest initiated reset */
1981        vdev->device_endian = virtio_current_cpu_endian();
1982    } else {
1983        /* System reset */
1984        vdev->device_endian = virtio_default_endian();
1985    }
1986
1987    if (k->reset) {
1988        k->reset(vdev);
1989    }
1990
1991    vdev->start_on_kick = false;
1992    vdev->started = false;
1993    vdev->broken = false;
1994    vdev->guest_features = 0;
1995    vdev->queue_sel = 0;
1996    vdev->status = 0;
1997    vdev->disabled = false;
1998    qatomic_set(&vdev->isr, 0);
1999    vdev->config_vector = VIRTIO_NO_VECTOR;
2000    virtio_notify_vector(vdev, vdev->config_vector);
2001
2002    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2003        vdev->vq[i].vring.desc = 0;
2004        vdev->vq[i].vring.avail = 0;
2005        vdev->vq[i].vring.used = 0;
2006        vdev->vq[i].last_avail_idx = 0;
2007        vdev->vq[i].shadow_avail_idx = 0;
2008        vdev->vq[i].used_idx = 0;
2009        vdev->vq[i].last_avail_wrap_counter = true;
2010        vdev->vq[i].shadow_avail_wrap_counter = true;
2011        vdev->vq[i].used_wrap_counter = true;
2012        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2013        vdev->vq[i].signalled_used = 0;
2014        vdev->vq[i].signalled_used_valid = false;
2015        vdev->vq[i].notification = true;
2016        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2017        vdev->vq[i].inuse = 0;
2018        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2019    }
2020}
2021
2022uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2023{
2024    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2025    uint8_t val;
2026
2027    if (addr + sizeof(val) > vdev->config_len) {
2028        return (uint32_t)-1;
2029    }
2030
2031    k->get_config(vdev, vdev->config);
2032
2033    val = ldub_p(vdev->config + addr);
2034    return val;
2035}
2036
2037uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2038{
2039    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2040    uint16_t val;
2041
2042    if (addr + sizeof(val) > vdev->config_len) {
2043        return (uint32_t)-1;
2044    }
2045
2046    k->get_config(vdev, vdev->config);
2047
2048    val = lduw_p(vdev->config + addr);
2049    return val;
2050}
2051
2052uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2053{
2054    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2055    uint32_t val;
2056
2057    if (addr + sizeof(val) > vdev->config_len) {
2058        return (uint32_t)-1;
2059    }
2060
2061    k->get_config(vdev, vdev->config);
2062
2063    val = ldl_p(vdev->config + addr);
2064    return val;
2065}
2066
2067void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2068{
2069    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2070    uint8_t val = data;
2071
2072    if (addr + sizeof(val) > vdev->config_len) {
2073        return;
2074    }
2075
2076    stb_p(vdev->config + addr, val);
2077
2078    if (k->set_config) {
2079        k->set_config(vdev, vdev->config);
2080    }
2081}
2082
2083void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2084{
2085    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2086    uint16_t val = data;
2087
2088    if (addr + sizeof(val) > vdev->config_len) {
2089        return;
2090    }
2091
2092    stw_p(vdev->config + addr, val);
2093
2094    if (k->set_config) {
2095        k->set_config(vdev, vdev->config);
2096    }
2097}
2098
2099void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2100{
2101    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2102    uint32_t val = data;
2103
2104    if (addr + sizeof(val) > vdev->config_len) {
2105        return;
2106    }
2107
2108    stl_p(vdev->config + addr, val);
2109
2110    if (k->set_config) {
2111        k->set_config(vdev, vdev->config);
2112    }
2113}
2114
2115uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2116{
2117    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2118    uint8_t val;
2119
2120    if (addr + sizeof(val) > vdev->config_len) {
2121        return (uint32_t)-1;
2122    }
2123
2124    k->get_config(vdev, vdev->config);
2125
2126    val = ldub_p(vdev->config + addr);
2127    return val;
2128}
2129
2130uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2131{
2132    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2133    uint16_t val;
2134
2135    if (addr + sizeof(val) > vdev->config_len) {
2136        return (uint32_t)-1;
2137    }
2138
2139    k->get_config(vdev, vdev->config);
2140
2141    val = lduw_le_p(vdev->config + addr);
2142    return val;
2143}
2144
2145uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2146{
2147    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2148    uint32_t val;
2149
2150    if (addr + sizeof(val) > vdev->config_len) {
2151        return (uint32_t)-1;
2152    }
2153
2154    k->get_config(vdev, vdev->config);
2155
2156    val = ldl_le_p(vdev->config + addr);
2157    return val;
2158}
2159
2160void virtio_config_modern_writeb(VirtIODevice *vdev,
2161                                 uint32_t addr, uint32_t data)
2162{
2163    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2164    uint8_t val = data;
2165
2166    if (addr + sizeof(val) > vdev->config_len) {
2167        return;
2168    }
2169
2170    stb_p(vdev->config + addr, val);
2171
2172    if (k->set_config) {
2173        k->set_config(vdev, vdev->config);
2174    }
2175}
2176
2177void virtio_config_modern_writew(VirtIODevice *vdev,
2178                                 uint32_t addr, uint32_t data)
2179{
2180    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2181    uint16_t val = data;
2182
2183    if (addr + sizeof(val) > vdev->config_len) {
2184        return;
2185    }
2186
2187    stw_le_p(vdev->config + addr, val);
2188
2189    if (k->set_config) {
2190        k->set_config(vdev, vdev->config);
2191    }
2192}
2193
2194void virtio_config_modern_writel(VirtIODevice *vdev,
2195                                 uint32_t addr, uint32_t data)
2196{
2197    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2198    uint32_t val = data;
2199
2200    if (addr + sizeof(val) > vdev->config_len) {
2201        return;
2202    }
2203
2204    stl_le_p(vdev->config + addr, val);
2205
2206    if (k->set_config) {
2207        k->set_config(vdev, vdev->config);
2208    }
2209}
2210
2211void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2212{
2213    if (!vdev->vq[n].vring.num) {
2214        return;
2215    }
2216    vdev->vq[n].vring.desc = addr;
2217    virtio_queue_update_rings(vdev, n);
2218}
2219
2220hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2221{
2222    return vdev->vq[n].vring.desc;
2223}
2224
2225void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2226                            hwaddr avail, hwaddr used)
2227{
2228    if (!vdev->vq[n].vring.num) {
2229        return;
2230    }
2231    vdev->vq[n].vring.desc = desc;
2232    vdev->vq[n].vring.avail = avail;
2233    vdev->vq[n].vring.used = used;
2234    virtio_init_region_cache(vdev, n);
2235}
2236
2237void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2238{
2239    /* Don't allow guest to flip queue between existent and
2240     * nonexistent states, or to set it to an invalid size.
2241     */
2242    if (!!num != !!vdev->vq[n].vring.num ||
2243        num > VIRTQUEUE_MAX_SIZE ||
2244        num < 0) {
2245        return;
2246    }
2247    vdev->vq[n].vring.num = num;
2248}
2249
2250VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2251{
2252    return QLIST_FIRST(&vdev->vector_queues[vector]);
2253}
2254
2255VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2256{
2257    return QLIST_NEXT(vq, node);
2258}
2259
2260int virtio_queue_get_num(VirtIODevice *vdev, int n)
2261{
2262    return vdev->vq[n].vring.num;
2263}
2264
2265int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2266{
2267    return vdev->vq[n].vring.num_default;
2268}
2269
2270int virtio_get_num_queues(VirtIODevice *vdev)
2271{
2272    int i;
2273
2274    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2275        if (!virtio_queue_get_num(vdev, i)) {
2276            break;
2277        }
2278    }
2279
2280    return i;
2281}
2282
2283void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2284{
2285    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2286    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2287
2288    /* virtio-1 compliant devices cannot change the alignment */
2289    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2290        error_report("tried to modify queue alignment for virtio-1 device");
2291        return;
2292    }
2293    /* Check that the transport told us it was going to do this
2294     * (so a buggy transport will immediately assert rather than
2295     * silently failing to migrate this state)
2296     */
2297    assert(k->has_variable_vring_alignment);
2298
2299    if (align) {
2300        vdev->vq[n].vring.align = align;
2301        virtio_queue_update_rings(vdev, n);
2302    }
2303}
2304
2305static void virtio_queue_notify_vq(VirtQueue *vq)
2306{
2307    if (vq->vring.desc && vq->handle_output) {
2308        VirtIODevice *vdev = vq->vdev;
2309
2310        if (unlikely(vdev->broken)) {
2311            return;
2312        }
2313
2314        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2315        vq->handle_output(vdev, vq);
2316
2317        if (unlikely(vdev->start_on_kick)) {
2318            virtio_set_started(vdev, true);
2319        }
2320    }
2321}
2322
2323void virtio_queue_notify(VirtIODevice *vdev, int n)
2324{
2325    VirtQueue *vq = &vdev->vq[n];
2326
2327    if (unlikely(!vq->vring.desc || vdev->broken)) {
2328        return;
2329    }
2330
2331    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2332    if (vq->host_notifier_enabled) {
2333        event_notifier_set(&vq->host_notifier);
2334    } else if (vq->handle_output) {
2335        vq->handle_output(vdev, vq);
2336
2337        if (unlikely(vdev->start_on_kick)) {
2338            virtio_set_started(vdev, true);
2339        }
2340    }
2341}
2342
2343uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2344{
2345    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2346        VIRTIO_NO_VECTOR;
2347}
2348
2349void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2350{
2351    VirtQueue *vq = &vdev->vq[n];
2352
2353    if (n < VIRTIO_QUEUE_MAX) {
2354        if (vdev->vector_queues &&
2355            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2356            QLIST_REMOVE(vq, node);
2357        }
2358        vdev->vq[n].vector = vector;
2359        if (vdev->vector_queues &&
2360            vector != VIRTIO_NO_VECTOR) {
2361            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2362        }
2363    }
2364}
2365
2366VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2367                            VirtIOHandleOutput handle_output)
2368{
2369    int i;
2370
2371    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2372        if (vdev->vq[i].vring.num == 0)
2373            break;
2374    }
2375
2376    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2377        abort();
2378
2379    vdev->vq[i].vring.num = queue_size;
2380    vdev->vq[i].vring.num_default = queue_size;
2381    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2382    vdev->vq[i].handle_output = handle_output;
2383    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2384
2385    return &vdev->vq[i];
2386}
2387
2388void virtio_delete_queue(VirtQueue *vq)
2389{
2390    vq->vring.num = 0;
2391    vq->vring.num_default = 0;
2392    vq->handle_output = NULL;
2393    g_free(vq->used_elems);
2394    vq->used_elems = NULL;
2395    virtio_virtqueue_reset_region_cache(vq);
2396}
2397
2398void virtio_del_queue(VirtIODevice *vdev, int n)
2399{
2400    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2401        abort();
2402    }
2403
2404    virtio_delete_queue(&vdev->vq[n]);
2405}
2406
2407static void virtio_set_isr(VirtIODevice *vdev, int value)
2408{
2409    uint8_t old = qatomic_read(&vdev->isr);
2410
2411    /* Do not write ISR if it does not change, so that its cacheline remains
2412     * shared in the common case where the guest does not read it.
2413     */
2414    if ((old & value) != value) {
2415        qatomic_or(&vdev->isr, value);
2416    }
2417}
2418
2419/* Called within rcu_read_lock(). */
2420static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2421{
2422    uint16_t old, new;
2423    bool v;
2424    /* We need to expose used array entries before checking used event. */
2425    smp_mb();
2426    /* Always notify when queue is empty (when feature acknowledge) */
2427    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2428        !vq->inuse && virtio_queue_empty(vq)) {
2429        return true;
2430    }
2431
2432    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2433        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2434    }
2435
2436    v = vq->signalled_used_valid;
2437    vq->signalled_used_valid = true;
2438    old = vq->signalled_used;
2439    new = vq->signalled_used = vq->used_idx;
2440    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2441}
2442
2443static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2444                                    uint16_t off_wrap, uint16_t new,
2445                                    uint16_t old)
2446{
2447    int off = off_wrap & ~(1 << 15);
2448
2449    if (wrap != off_wrap >> 15) {
2450        off -= vq->vring.num;
2451    }
2452
2453    return vring_need_event(off, new, old);
2454}
2455
2456/* Called within rcu_read_lock(). */
2457static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2458{
2459    VRingPackedDescEvent e;
2460    uint16_t old, new;
2461    bool v;
2462    VRingMemoryRegionCaches *caches;
2463
2464    caches = vring_get_region_caches(vq);
2465    if (!caches) {
2466        return false;
2467    }
2468
2469    vring_packed_event_read(vdev, &caches->avail, &e);
2470
2471    old = vq->signalled_used;
2472    new = vq->signalled_used = vq->used_idx;
2473    v = vq->signalled_used_valid;
2474    vq->signalled_used_valid = true;
2475
2476    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2477        return false;
2478    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2479        return true;
2480    }
2481
2482    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2483                                         e.off_wrap, new, old);
2484}
2485
2486/* Called within rcu_read_lock().  */
2487static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2488{
2489    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2490        return virtio_packed_should_notify(vdev, vq);
2491    } else {
2492        return virtio_split_should_notify(vdev, vq);
2493    }
2494}
2495
2496void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2497{
2498    WITH_RCU_READ_LOCK_GUARD() {
2499        if (!virtio_should_notify(vdev, vq)) {
2500            return;
2501        }
2502    }
2503
2504    trace_virtio_notify_irqfd(vdev, vq);
2505
2506    /*
2507     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2508     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2509     * incorrectly polling this bit during crashdump and hibernation
2510     * in MSI mode, causing a hang if this bit is never updated.
2511     * Recent releases of Windows do not really shut down, but rather
2512     * log out and hibernate to make the next startup faster.  Hence,
2513     * this manifested as a more serious hang during shutdown with
2514     *
2515     * Next driver release from 2016 fixed this problem, so working around it
2516     * is not a must, but it's easy to do so let's do it here.
2517     *
2518     * Note: it's safe to update ISR from any thread as it was switched
2519     * to an atomic operation.
2520     */
2521    virtio_set_isr(vq->vdev, 0x1);
2522    event_notifier_set(&vq->guest_notifier);
2523}
2524
2525static void virtio_irq(VirtQueue *vq)
2526{
2527    virtio_set_isr(vq->vdev, 0x1);
2528    virtio_notify_vector(vq->vdev, vq->vector);
2529}
2530
2531void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2532{
2533    WITH_RCU_READ_LOCK_GUARD() {
2534        if (!virtio_should_notify(vdev, vq)) {
2535            return;
2536        }
2537    }
2538
2539    trace_virtio_notify(vdev, vq);
2540    virtio_irq(vq);
2541}
2542
2543void virtio_notify_config(VirtIODevice *vdev)
2544{
2545    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2546        return;
2547
2548    virtio_set_isr(vdev, 0x3);
2549    vdev->generation++;
2550    virtio_notify_vector(vdev, vdev->config_vector);
2551}
2552
2553static bool virtio_device_endian_needed(void *opaque)
2554{
2555    VirtIODevice *vdev = opaque;
2556
2557    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2558    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2559        return vdev->device_endian != virtio_default_endian();
2560    }
2561    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2562    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2563}
2564
2565static bool virtio_64bit_features_needed(void *opaque)
2566{
2567    VirtIODevice *vdev = opaque;
2568
2569    return (vdev->host_features >> 32) != 0;
2570}
2571
2572static bool virtio_virtqueue_needed(void *opaque)
2573{
2574    VirtIODevice *vdev = opaque;
2575
2576    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2577}
2578
2579static bool virtio_packed_virtqueue_needed(void *opaque)
2580{
2581    VirtIODevice *vdev = opaque;
2582
2583    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2584}
2585
2586static bool virtio_ringsize_needed(void *opaque)
2587{
2588    VirtIODevice *vdev = opaque;
2589    int i;
2590
2591    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2592        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2593            return true;
2594        }
2595    }
2596    return false;
2597}
2598
2599static bool virtio_extra_state_needed(void *opaque)
2600{
2601    VirtIODevice *vdev = opaque;
2602    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2603    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2604
2605    return k->has_extra_state &&
2606        k->has_extra_state(qbus->parent);
2607}
2608
2609static bool virtio_broken_needed(void *opaque)
2610{
2611    VirtIODevice *vdev = opaque;
2612
2613    return vdev->broken;
2614}
2615
2616static bool virtio_started_needed(void *opaque)
2617{
2618    VirtIODevice *vdev = opaque;
2619
2620    return vdev->started;
2621}
2622
2623static bool virtio_disabled_needed(void *opaque)
2624{
2625    VirtIODevice *vdev = opaque;
2626
2627    return vdev->disabled;
2628}
2629
2630static const VMStateDescription vmstate_virtqueue = {
2631    .name = "virtqueue_state",
2632    .version_id = 1,
2633    .minimum_version_id = 1,
2634    .fields = (VMStateField[]) {
2635        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2636        VMSTATE_UINT64(vring.used, struct VirtQueue),
2637        VMSTATE_END_OF_LIST()
2638    }
2639};
2640
2641static const VMStateDescription vmstate_packed_virtqueue = {
2642    .name = "packed_virtqueue_state",
2643    .version_id = 1,
2644    .minimum_version_id = 1,
2645    .fields = (VMStateField[]) {
2646        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2647        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2648        VMSTATE_UINT16(used_idx, struct VirtQueue),
2649        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2650        VMSTATE_UINT32(inuse, struct VirtQueue),
2651        VMSTATE_END_OF_LIST()
2652    }
2653};
2654
2655static const VMStateDescription vmstate_virtio_virtqueues = {
2656    .name = "virtio/virtqueues",
2657    .version_id = 1,
2658    .minimum_version_id = 1,
2659    .needed = &virtio_virtqueue_needed,
2660    .fields = (VMStateField[]) {
2661        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2662                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2663        VMSTATE_END_OF_LIST()
2664    }
2665};
2666
2667static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2668    .name = "virtio/packed_virtqueues",
2669    .version_id = 1,
2670    .minimum_version_id = 1,
2671    .needed = &virtio_packed_virtqueue_needed,
2672    .fields = (VMStateField[]) {
2673        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2674                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2675        VMSTATE_END_OF_LIST()
2676    }
2677};
2678
2679static const VMStateDescription vmstate_ringsize = {
2680    .name = "ringsize_state",
2681    .version_id = 1,
2682    .minimum_version_id = 1,
2683    .fields = (VMStateField[]) {
2684        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2685        VMSTATE_END_OF_LIST()
2686    }
2687};
2688
2689static const VMStateDescription vmstate_virtio_ringsize = {
2690    .name = "virtio/ringsize",
2691    .version_id = 1,
2692    .minimum_version_id = 1,
2693    .needed = &virtio_ringsize_needed,
2694    .fields = (VMStateField[]) {
2695        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2696                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2697        VMSTATE_END_OF_LIST()
2698    }
2699};
2700
2701static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2702                           const VMStateField *field)
2703{
2704    VirtIODevice *vdev = pv;
2705    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2706    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2707
2708    if (!k->load_extra_state) {
2709        return -1;
2710    } else {
2711        return k->load_extra_state(qbus->parent, f);
2712    }
2713}
2714
2715static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2716                           const VMStateField *field, JSONWriter *vmdesc)
2717{
2718    VirtIODevice *vdev = pv;
2719    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2720    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2721
2722    k->save_extra_state(qbus->parent, f);
2723    return 0;
2724}
2725
2726static const VMStateInfo vmstate_info_extra_state = {
2727    .name = "virtqueue_extra_state",
2728    .get = get_extra_state,
2729    .put = put_extra_state,
2730};
2731
2732static const VMStateDescription vmstate_virtio_extra_state = {
2733    .name = "virtio/extra_state",
2734    .version_id = 1,
2735    .minimum_version_id = 1,
2736    .needed = &virtio_extra_state_needed,
2737    .fields = (VMStateField[]) {
2738        {
2739            .name         = "extra_state",
2740            .version_id   = 0,
2741            .field_exists = NULL,
2742            .size         = 0,
2743            .info         = &vmstate_info_extra_state,
2744            .flags        = VMS_SINGLE,
2745            .offset       = 0,
2746        },
2747        VMSTATE_END_OF_LIST()
2748    }
2749};
2750
2751static const VMStateDescription vmstate_virtio_device_endian = {
2752    .name = "virtio/device_endian",
2753    .version_id = 1,
2754    .minimum_version_id = 1,
2755    .needed = &virtio_device_endian_needed,
2756    .fields = (VMStateField[]) {
2757        VMSTATE_UINT8(device_endian, VirtIODevice),
2758        VMSTATE_END_OF_LIST()
2759    }
2760};
2761
2762static const VMStateDescription vmstate_virtio_64bit_features = {
2763    .name = "virtio/64bit_features",
2764    .version_id = 1,
2765    .minimum_version_id = 1,
2766    .needed = &virtio_64bit_features_needed,
2767    .fields = (VMStateField[]) {
2768        VMSTATE_UINT64(guest_features, VirtIODevice),
2769        VMSTATE_END_OF_LIST()
2770    }
2771};
2772
2773static const VMStateDescription vmstate_virtio_broken = {
2774    .name = "virtio/broken",
2775    .version_id = 1,
2776    .minimum_version_id = 1,
2777    .needed = &virtio_broken_needed,
2778    .fields = (VMStateField[]) {
2779        VMSTATE_BOOL(broken, VirtIODevice),
2780        VMSTATE_END_OF_LIST()
2781    }
2782};
2783
2784static const VMStateDescription vmstate_virtio_started = {
2785    .name = "virtio/started",
2786    .version_id = 1,
2787    .minimum_version_id = 1,
2788    .needed = &virtio_started_needed,
2789    .fields = (VMStateField[]) {
2790        VMSTATE_BOOL(started, VirtIODevice),
2791        VMSTATE_END_OF_LIST()
2792    }
2793};
2794
2795static const VMStateDescription vmstate_virtio_disabled = {
2796    .name = "virtio/disabled",
2797    .version_id = 1,
2798    .minimum_version_id = 1,
2799    .needed = &virtio_disabled_needed,
2800    .fields = (VMStateField[]) {
2801        VMSTATE_BOOL(disabled, VirtIODevice),
2802        VMSTATE_END_OF_LIST()
2803    }
2804};
2805
2806static const VMStateDescription vmstate_virtio = {
2807    .name = "virtio",
2808    .version_id = 1,
2809    .minimum_version_id = 1,
2810    .fields = (VMStateField[]) {
2811        VMSTATE_END_OF_LIST()
2812    },
2813    .subsections = (const VMStateDescription*[]) {
2814        &vmstate_virtio_device_endian,
2815        &vmstate_virtio_64bit_features,
2816        &vmstate_virtio_virtqueues,
2817        &vmstate_virtio_ringsize,
2818        &vmstate_virtio_broken,
2819        &vmstate_virtio_extra_state,
2820        &vmstate_virtio_started,
2821        &vmstate_virtio_packed_virtqueues,
2822        &vmstate_virtio_disabled,
2823        NULL
2824    }
2825};
2826
2827int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2828{
2829    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2830    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2831    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2832    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2833    int i;
2834
2835    if (k->save_config) {
2836        k->save_config(qbus->parent, f);
2837    }
2838
2839    qemu_put_8s(f, &vdev->status);
2840    qemu_put_8s(f, &vdev->isr);
2841    qemu_put_be16s(f, &vdev->queue_sel);
2842    qemu_put_be32s(f, &guest_features_lo);
2843    qemu_put_be32(f, vdev->config_len);
2844    qemu_put_buffer(f, vdev->config, vdev->config_len);
2845
2846    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2847        if (vdev->vq[i].vring.num == 0)
2848            break;
2849    }
2850
2851    qemu_put_be32(f, i);
2852
2853    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2854        if (vdev->vq[i].vring.num == 0)
2855            break;
2856
2857        qemu_put_be32(f, vdev->vq[i].vring.num);
2858        if (k->has_variable_vring_alignment) {
2859            qemu_put_be32(f, vdev->vq[i].vring.align);
2860        }
2861        /*
2862         * Save desc now, the rest of the ring addresses are saved in
2863         * subsections for VIRTIO-1 devices.
2864         */
2865        qemu_put_be64(f, vdev->vq[i].vring.desc);
2866        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2867        if (k->save_queue) {
2868            k->save_queue(qbus->parent, i, f);
2869        }
2870    }
2871
2872    if (vdc->save != NULL) {
2873        vdc->save(vdev, f);
2874    }
2875
2876    if (vdc->vmsd) {
2877        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2878        if (ret) {
2879            return ret;
2880        }
2881    }
2882
2883    /* Subsections */
2884    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2885}
2886
2887/* A wrapper for use as a VMState .put function */
2888static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2889                              const VMStateField *field, JSONWriter *vmdesc)
2890{
2891    return virtio_save(VIRTIO_DEVICE(opaque), f);
2892}
2893
2894/* A wrapper for use as a VMState .get function */
2895static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2896                             const VMStateField *field)
2897{
2898    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2899    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2900
2901    return virtio_load(vdev, f, dc->vmsd->version_id);
2902}
2903
2904const VMStateInfo  virtio_vmstate_info = {
2905    .name = "virtio",
2906    .get = virtio_device_get,
2907    .put = virtio_device_put,
2908};
2909
2910static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2911{
2912    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2913    bool bad = (val & ~(vdev->host_features)) != 0;
2914
2915    val &= vdev->host_features;
2916    if (k->set_features) {
2917        k->set_features(vdev, val);
2918    }
2919    vdev->guest_features = val;
2920    return bad ? -1 : 0;
2921}
2922
2923int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2924{
2925    int ret;
2926    /*
2927     * The driver must not attempt to set features after feature negotiation
2928     * has finished.
2929     */
2930    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2931        return -EINVAL;
2932    }
2933    ret = virtio_set_features_nocheck(vdev, val);
2934    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2935        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2936        int i;
2937        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2938            if (vdev->vq[i].vring.num != 0) {
2939                virtio_init_region_cache(vdev, i);
2940            }
2941        }
2942    }
2943    if (!ret) {
2944        if (!virtio_device_started(vdev, vdev->status) &&
2945            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2946            vdev->start_on_kick = true;
2947        }
2948    }
2949    return ret;
2950}
2951
2952size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2953                                      uint64_t host_features)
2954{
2955    size_t config_size = 0;
2956    int i;
2957
2958    for (i = 0; feature_sizes[i].flags != 0; i++) {
2959        if (host_features & feature_sizes[i].flags) {
2960            config_size = MAX(feature_sizes[i].end, config_size);
2961        }
2962    }
2963
2964    return config_size;
2965}
2966
2967int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2968{
2969    int i, ret;
2970    int32_t config_len;
2971    uint32_t num;
2972    uint32_t features;
2973    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2974    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2975    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2976
2977    /*
2978     * We poison the endianness to ensure it does not get used before
2979     * subsections have been loaded.
2980     */
2981    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2982
2983    if (k->load_config) {
2984        ret = k->load_config(qbus->parent, f);
2985        if (ret)
2986            return ret;
2987    }
2988
2989    qemu_get_8s(f, &vdev->status);
2990    qemu_get_8s(f, &vdev->isr);
2991    qemu_get_be16s(f, &vdev->queue_sel);
2992    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2993        return -1;
2994    }
2995    qemu_get_be32s(f, &features);
2996
2997    /*
2998     * Temporarily set guest_features low bits - needed by
2999     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3000     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3001     *
3002     * Note: devices should always test host features in future - don't create
3003     * new dependencies like this.
3004     */
3005    vdev->guest_features = features;
3006
3007    config_len = qemu_get_be32(f);
3008
3009    /*
3010     * There are cases where the incoming config can be bigger or smaller
3011     * than what we have; so load what we have space for, and skip
3012     * any excess that's in the stream.
3013     */
3014    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3015
3016    while (config_len > vdev->config_len) {
3017        qemu_get_byte(f);
3018        config_len--;
3019    }
3020
3021    num = qemu_get_be32(f);
3022
3023    if (num > VIRTIO_QUEUE_MAX) {
3024        error_report("Invalid number of virtqueues: 0x%x", num);
3025        return -1;
3026    }
3027
3028    for (i = 0; i < num; i++) {
3029        vdev->vq[i].vring.num = qemu_get_be32(f);
3030        if (k->has_variable_vring_alignment) {
3031            vdev->vq[i].vring.align = qemu_get_be32(f);
3032        }
3033        vdev->vq[i].vring.desc = qemu_get_be64(f);
3034        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3035        vdev->vq[i].signalled_used_valid = false;
3036        vdev->vq[i].notification = true;
3037
3038        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3039            error_report("VQ %d address 0x0 "
3040                         "inconsistent with Host index 0x%x",
3041                         i, vdev->vq[i].last_avail_idx);
3042            return -1;
3043        }
3044        if (k->load_queue) {
3045            ret = k->load_queue(qbus->parent, i, f);
3046            if (ret)
3047                return ret;
3048        }
3049    }
3050
3051    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3052
3053    if (vdc->load != NULL) {
3054        ret = vdc->load(vdev, f, version_id);
3055        if (ret) {
3056            return ret;
3057        }
3058    }
3059
3060    if (vdc->vmsd) {
3061        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3062        if (ret) {
3063            return ret;
3064        }
3065    }
3066
3067    /* Subsections */
3068    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3069    if (ret) {
3070        return ret;
3071    }
3072
3073    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3074        vdev->device_endian = virtio_default_endian();
3075    }
3076
3077    if (virtio_64bit_features_needed(vdev)) {
3078        /*
3079         * Subsection load filled vdev->guest_features.  Run them
3080         * through virtio_set_features to sanity-check them against
3081         * host_features.
3082         */
3083        uint64_t features64 = vdev->guest_features;
3084        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3085            error_report("Features 0x%" PRIx64 " unsupported. "
3086                         "Allowed features: 0x%" PRIx64,
3087                         features64, vdev->host_features);
3088            return -1;
3089        }
3090    } else {
3091        if (virtio_set_features_nocheck(vdev, features) < 0) {
3092            error_report("Features 0x%x unsupported. "
3093                         "Allowed features: 0x%" PRIx64,
3094                         features, vdev->host_features);
3095            return -1;
3096        }
3097    }
3098
3099    if (!virtio_device_started(vdev, vdev->status) &&
3100        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3101        vdev->start_on_kick = true;
3102    }
3103
3104    RCU_READ_LOCK_GUARD();
3105    for (i = 0; i < num; i++) {
3106        if (vdev->vq[i].vring.desc) {
3107            uint16_t nheads;
3108
3109            /*
3110             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3111             * only the region cache needs to be set up.  Legacy devices need
3112             * to calculate used and avail ring addresses based on the desc
3113             * address.
3114             */
3115            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3116                virtio_init_region_cache(vdev, i);
3117            } else {
3118                virtio_queue_update_rings(vdev, i);
3119            }
3120
3121            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3122                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3123                vdev->vq[i].shadow_avail_wrap_counter =
3124                                        vdev->vq[i].last_avail_wrap_counter;
3125                continue;
3126            }
3127
3128            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3129            /* Check it isn't doing strange things with descriptor numbers. */
3130            if (nheads > vdev->vq[i].vring.num) {
3131                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3132                             "inconsistent with Host index 0x%x: delta 0x%x",
3133                             i, vdev->vq[i].vring.num,
3134                             vring_avail_idx(&vdev->vq[i]),
3135                             vdev->vq[i].last_avail_idx, nheads);
3136                vdev->vq[i].used_idx = 0;
3137                vdev->vq[i].shadow_avail_idx = 0;
3138                vdev->vq[i].inuse = 0;
3139                continue;
3140            }
3141            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3142            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3143
3144            /*
3145             * Some devices migrate VirtQueueElements that have been popped
3146             * from the avail ring but not yet returned to the used ring.
3147             * Since max ring size < UINT16_MAX it's safe to use modulo
3148             * UINT16_MAX + 1 subtraction.
3149             */
3150            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3151                                vdev->vq[i].used_idx);
3152            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3153                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3154                             "used_idx 0x%x",
3155                             i, vdev->vq[i].vring.num,
3156                             vdev->vq[i].last_avail_idx,
3157                             vdev->vq[i].used_idx);
3158                return -1;
3159            }
3160        }
3161    }
3162
3163    if (vdc->post_load) {
3164        ret = vdc->post_load(vdev);
3165        if (ret) {
3166            return ret;
3167        }
3168    }
3169
3170    return 0;
3171}
3172
3173void virtio_cleanup(VirtIODevice *vdev)
3174{
3175    qemu_del_vm_change_state_handler(vdev->vmstate);
3176}
3177
3178static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3179{
3180    VirtIODevice *vdev = opaque;
3181    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3182    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3183    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3184    vdev->vm_running = running;
3185
3186    if (backend_run) {
3187        virtio_set_status(vdev, vdev->status);
3188    }
3189
3190    if (k->vmstate_change) {
3191        k->vmstate_change(qbus->parent, backend_run);
3192    }
3193
3194    if (!backend_run) {
3195        virtio_set_status(vdev, vdev->status);
3196    }
3197}
3198
3199void virtio_instance_init_common(Object *proxy_obj, void *data,
3200                                 size_t vdev_size, const char *vdev_name)
3201{
3202    DeviceState *vdev = data;
3203
3204    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3205                                       vdev_size, vdev_name, &error_abort,
3206                                       NULL);
3207    qdev_alias_all_properties(vdev, proxy_obj);
3208}
3209
3210void virtio_init(VirtIODevice *vdev, const char *name,
3211                 uint16_t device_id, size_t config_size)
3212{
3213    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3214    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3215    int i;
3216    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3217
3218    if (nvectors) {
3219        vdev->vector_queues =
3220            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3221    }
3222
3223    vdev->start_on_kick = false;
3224    vdev->started = false;
3225    vdev->device_id = device_id;
3226    vdev->status = 0;
3227    qatomic_set(&vdev->isr, 0);
3228    vdev->queue_sel = 0;
3229    vdev->config_vector = VIRTIO_NO_VECTOR;
3230    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3231    vdev->vm_running = runstate_is_running();
3232    vdev->broken = false;
3233    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3234        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3235        vdev->vq[i].vdev = vdev;
3236        vdev->vq[i].queue_index = i;
3237        vdev->vq[i].host_notifier_enabled = false;
3238    }
3239
3240    vdev->name = name;
3241    vdev->config_len = config_size;
3242    if (vdev->config_len) {
3243        vdev->config = g_malloc0(config_size);
3244    } else {
3245        vdev->config = NULL;
3246    }
3247    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3248            virtio_vmstate_change, vdev);
3249    vdev->device_endian = virtio_default_endian();
3250    vdev->use_guest_notifier_mask = true;
3251}
3252
3253/*
3254 * Only devices that have already been around prior to defining the virtio
3255 * standard support legacy mode; this includes devices not specified in the
3256 * standard. All newer devices conform to the virtio standard only.
3257 */
3258bool virtio_legacy_allowed(VirtIODevice *vdev)
3259{
3260    switch (vdev->device_id) {
3261    case VIRTIO_ID_NET:
3262    case VIRTIO_ID_BLOCK:
3263    case VIRTIO_ID_CONSOLE:
3264    case VIRTIO_ID_RNG:
3265    case VIRTIO_ID_BALLOON:
3266    case VIRTIO_ID_RPMSG:
3267    case VIRTIO_ID_SCSI:
3268    case VIRTIO_ID_9P:
3269    case VIRTIO_ID_RPROC_SERIAL:
3270    case VIRTIO_ID_CAIF:
3271        return true;
3272    default:
3273        return false;
3274    }
3275}
3276
3277bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3278{
3279    return vdev->disable_legacy_check;
3280}
3281
3282hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3283{
3284    return vdev->vq[n].vring.desc;
3285}
3286
3287bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3288{
3289    return virtio_queue_get_desc_addr(vdev, n) != 0;
3290}
3291
3292bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3293{
3294    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3295    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3296
3297    if (k->queue_enabled) {
3298        return k->queue_enabled(qbus->parent, n);
3299    }
3300    return virtio_queue_enabled_legacy(vdev, n);
3301}
3302
3303hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3304{
3305    return vdev->vq[n].vring.avail;
3306}
3307
3308hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3309{
3310    return vdev->vq[n].vring.used;
3311}
3312
3313hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3314{
3315    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3316}
3317
3318hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3319{
3320    int s;
3321
3322    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3323        return sizeof(struct VRingPackedDescEvent);
3324    }
3325
3326    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3327    return offsetof(VRingAvail, ring) +
3328        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3329}
3330
3331hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3332{
3333    int s;
3334
3335    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3336        return sizeof(struct VRingPackedDescEvent);
3337    }
3338
3339    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3340    return offsetof(VRingUsed, ring) +
3341        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3342}
3343
3344static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3345                                                           int n)
3346{
3347    unsigned int avail, used;
3348
3349    avail = vdev->vq[n].last_avail_idx;
3350    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3351
3352    used = vdev->vq[n].used_idx;
3353    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3354
3355    return avail | used << 16;
3356}
3357
3358static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3359                                                      int n)
3360{
3361    return vdev->vq[n].last_avail_idx;
3362}
3363
3364unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3365{
3366    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3367        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3368    } else {
3369        return virtio_queue_split_get_last_avail_idx(vdev, n);
3370    }
3371}
3372
3373static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3374                                                   int n, unsigned int idx)
3375{
3376    struct VirtQueue *vq = &vdev->vq[n];
3377
3378    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3379    vq->last_avail_wrap_counter =
3380        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3381    idx >>= 16;
3382    vq->used_idx = idx & 0x7ffff;
3383    vq->used_wrap_counter = !!(idx & 0x8000);
3384}
3385
3386static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3387                                                  int n, unsigned int idx)
3388{
3389        vdev->vq[n].last_avail_idx = idx;
3390        vdev->vq[n].shadow_avail_idx = idx;
3391}
3392
3393void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3394                                     unsigned int idx)
3395{
3396    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3397        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3398    } else {
3399        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3400    }
3401}
3402
3403static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3404                                                       int n)
3405{
3406    /* We don't have a reference like avail idx in shared memory */
3407    return;
3408}
3409
3410static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3411                                                      int n)
3412{
3413    RCU_READ_LOCK_GUARD();
3414    if (vdev->vq[n].vring.desc) {
3415        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3416        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3417    }
3418}
3419
3420void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3421{
3422    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3423        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3424    } else {
3425        virtio_queue_split_restore_last_avail_idx(vdev, n);
3426    }
3427}
3428
3429static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3430{
3431    /* used idx was updated through set_last_avail_idx() */
3432    return;
3433}
3434
3435static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3436{
3437    RCU_READ_LOCK_GUARD();
3438    if (vdev->vq[n].vring.desc) {
3439        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3440    }
3441}
3442
3443void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3444{
3445    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3446        return virtio_queue_packed_update_used_idx(vdev, n);
3447    } else {
3448        return virtio_split_packed_update_used_idx(vdev, n);
3449    }
3450}
3451
3452void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3453{
3454    vdev->vq[n].signalled_used_valid = false;
3455}
3456
3457VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3458{
3459    return vdev->vq + n;
3460}
3461
3462uint16_t virtio_get_queue_index(VirtQueue *vq)
3463{
3464    return vq->queue_index;
3465}
3466
3467static void virtio_queue_guest_notifier_read(EventNotifier *n)
3468{
3469    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3470    if (event_notifier_test_and_clear(n)) {
3471        virtio_irq(vq);
3472    }
3473}
3474
3475void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3476                                                bool with_irqfd)
3477{
3478    if (assign && !with_irqfd) {
3479        event_notifier_set_handler(&vq->guest_notifier,
3480                                   virtio_queue_guest_notifier_read);
3481    } else {
3482        event_notifier_set_handler(&vq->guest_notifier, NULL);
3483    }
3484    if (!assign) {
3485        /* Test and clear notifier before closing it,
3486         * in case poll callback didn't have time to run. */
3487        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3488    }
3489}
3490
3491EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3492{
3493    return &vq->guest_notifier;
3494}
3495
3496static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3497{
3498    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3499
3500    virtio_queue_set_notification(vq, 0);
3501}
3502
3503static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3504{
3505    EventNotifier *n = opaque;
3506    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3507
3508    return vq->vring.desc && !virtio_queue_empty(vq);
3509}
3510
3511static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3512{
3513    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3514
3515    virtio_queue_notify_vq(vq);
3516}
3517
3518static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3519{
3520    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3521
3522    /* Caller polls once more after this to catch requests that race with us */
3523    virtio_queue_set_notification(vq, 1);
3524}
3525
3526void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3527{
3528    aio_set_event_notifier(ctx, &vq->host_notifier, true,
3529                           virtio_queue_host_notifier_read,
3530                           virtio_queue_host_notifier_aio_poll,
3531                           virtio_queue_host_notifier_aio_poll_ready);
3532    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3533                                virtio_queue_host_notifier_aio_poll_begin,
3534                                virtio_queue_host_notifier_aio_poll_end);
3535}
3536
3537void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3538{
3539    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3540    /* Test and clear notifier before after disabling event,
3541     * in case poll callback didn't have time to run. */
3542    virtio_queue_host_notifier_read(&vq->host_notifier);
3543}
3544
3545void virtio_queue_host_notifier_read(EventNotifier *n)
3546{
3547    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548    if (event_notifier_test_and_clear(n)) {
3549        virtio_queue_notify_vq(vq);
3550    }
3551}
3552
3553EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3554{
3555    return &vq->host_notifier;
3556}
3557
3558void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3559{
3560    vq->host_notifier_enabled = enabled;
3561}
3562
3563int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3564                                      MemoryRegion *mr, bool assign)
3565{
3566    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3567    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3568
3569    if (k->set_host_notifier_mr) {
3570        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3571    }
3572
3573    return -1;
3574}
3575
3576void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3577{
3578    g_free(vdev->bus_name);
3579    vdev->bus_name = g_strdup(bus_name);
3580}
3581
3582void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3583{
3584    va_list ap;
3585
3586    va_start(ap, fmt);
3587    error_vreport(fmt, ap);
3588    va_end(ap);
3589
3590    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3591        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3592        virtio_notify_config(vdev);
3593    }
3594
3595    vdev->broken = true;
3596}
3597
3598static void virtio_memory_listener_commit(MemoryListener *listener)
3599{
3600    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3601    int i;
3602
3603    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3604        if (vdev->vq[i].vring.num == 0) {
3605            break;
3606        }
3607        virtio_init_region_cache(vdev, i);
3608    }
3609}
3610
3611static void virtio_device_realize(DeviceState *dev, Error **errp)
3612{
3613    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3614    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3615    Error *err = NULL;
3616
3617    /* Devices should either use vmsd or the load/save methods */
3618    assert(!vdc->vmsd || !vdc->load);
3619
3620    if (vdc->realize != NULL) {
3621        vdc->realize(dev, &err);
3622        if (err != NULL) {
3623            error_propagate(errp, err);
3624            return;
3625        }
3626    }
3627
3628    virtio_bus_device_plugged(vdev, &err);
3629    if (err != NULL) {
3630        error_propagate(errp, err);
3631        vdc->unrealize(dev);
3632        return;
3633    }
3634
3635    vdev->listener.commit = virtio_memory_listener_commit;
3636    vdev->listener.name = "virtio";
3637    memory_listener_register(&vdev->listener, vdev->dma_as);
3638}
3639
3640static void virtio_device_unrealize(DeviceState *dev)
3641{
3642    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3643    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3644
3645    memory_listener_unregister(&vdev->listener);
3646    virtio_bus_device_unplugged(vdev);
3647
3648    if (vdc->unrealize != NULL) {
3649        vdc->unrealize(dev);
3650    }
3651
3652    g_free(vdev->bus_name);
3653    vdev->bus_name = NULL;
3654}
3655
3656static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3657{
3658    int i;
3659    if (!vdev->vq) {
3660        return;
3661    }
3662
3663    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3664        if (vdev->vq[i].vring.num == 0) {
3665            break;
3666        }
3667        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3668    }
3669    g_free(vdev->vq);
3670}
3671
3672static void virtio_device_instance_finalize(Object *obj)
3673{
3674    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3675
3676    virtio_device_free_virtqueues(vdev);
3677
3678    g_free(vdev->config);
3679    g_free(vdev->vector_queues);
3680}
3681
3682static Property virtio_properties[] = {
3683    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3684    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3685    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3686    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3687                     disable_legacy_check, false),
3688    DEFINE_PROP_END_OF_LIST(),
3689};
3690
3691static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3692{
3693    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3694    int i, n, r, err;
3695
3696    /*
3697     * Batch all the host notifiers in a single transaction to avoid
3698     * quadratic time complexity in address_space_update_ioeventfds().
3699     */
3700    memory_region_transaction_begin();
3701    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3702        VirtQueue *vq = &vdev->vq[n];
3703        if (!virtio_queue_get_num(vdev, n)) {
3704            continue;
3705        }
3706        r = virtio_bus_set_host_notifier(qbus, n, true);
3707        if (r < 0) {
3708            err = r;
3709            goto assign_error;
3710        }
3711        event_notifier_set_handler(&vq->host_notifier,
3712                                   virtio_queue_host_notifier_read);
3713    }
3714
3715    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3716        /* Kick right away to begin processing requests already in vring */
3717        VirtQueue *vq = &vdev->vq[n];
3718        if (!vq->vring.num) {
3719            continue;
3720        }
3721        event_notifier_set(&vq->host_notifier);
3722    }
3723    memory_region_transaction_commit();
3724    return 0;
3725
3726assign_error:
3727    i = n; /* save n for a second iteration after transaction is committed. */
3728    while (--n >= 0) {
3729        VirtQueue *vq = &vdev->vq[n];
3730        if (!virtio_queue_get_num(vdev, n)) {
3731            continue;
3732        }
3733
3734        event_notifier_set_handler(&vq->host_notifier, NULL);
3735        r = virtio_bus_set_host_notifier(qbus, n, false);
3736        assert(r >= 0);
3737    }
3738    /*
3739     * The transaction expects the ioeventfds to be open when it
3740     * commits. Do it now, before the cleanup loop.
3741     */
3742    memory_region_transaction_commit();
3743
3744    while (--i >= 0) {
3745        if (!virtio_queue_get_num(vdev, i)) {
3746            continue;
3747        }
3748        virtio_bus_cleanup_host_notifier(qbus, i);
3749    }
3750    return err;
3751}
3752
3753int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3754{
3755    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3756    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3757
3758    return virtio_bus_start_ioeventfd(vbus);
3759}
3760
3761static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3762{
3763    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3764    int n, r;
3765
3766    /*
3767     * Batch all the host notifiers in a single transaction to avoid
3768     * quadratic time complexity in address_space_update_ioeventfds().
3769     */
3770    memory_region_transaction_begin();
3771    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3772        VirtQueue *vq = &vdev->vq[n];
3773
3774        if (!virtio_queue_get_num(vdev, n)) {
3775            continue;
3776        }
3777        event_notifier_set_handler(&vq->host_notifier, NULL);
3778        r = virtio_bus_set_host_notifier(qbus, n, false);
3779        assert(r >= 0);
3780    }
3781    /*
3782     * The transaction expects the ioeventfds to be open when it
3783     * commits. Do it now, before the cleanup loop.
3784     */
3785    memory_region_transaction_commit();
3786
3787    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3788        if (!virtio_queue_get_num(vdev, n)) {
3789            continue;
3790        }
3791        virtio_bus_cleanup_host_notifier(qbus, n);
3792    }
3793}
3794
3795int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3796{
3797    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3798    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3799
3800    return virtio_bus_grab_ioeventfd(vbus);
3801}
3802
3803void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3804{
3805    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3806    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3807
3808    virtio_bus_release_ioeventfd(vbus);
3809}
3810
3811static void virtio_device_class_init(ObjectClass *klass, void *data)
3812{
3813    /* Set the default value here. */
3814    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3815    DeviceClass *dc = DEVICE_CLASS(klass);
3816
3817    dc->realize = virtio_device_realize;
3818    dc->unrealize = virtio_device_unrealize;
3819    dc->bus_type = TYPE_VIRTIO_BUS;
3820    device_class_set_props(dc, virtio_properties);
3821    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3822    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3823
3824    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3825}
3826
3827bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3828{
3829    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3830    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3831
3832    return virtio_bus_ioeventfd_enabled(vbus);
3833}
3834
3835static const TypeInfo virtio_device_info = {
3836    .name = TYPE_VIRTIO_DEVICE,
3837    .parent = TYPE_DEVICE,
3838    .instance_size = sizeof(VirtIODevice),
3839    .class_init = virtio_device_class_init,
3840    .instance_finalize = virtio_device_instance_finalize,
3841    .abstract = true,
3842    .class_size = sizeof(VirtioDeviceClass),
3843};
3844
3845static void virtio_register_types(void)
3846{
3847    type_register_static(&virtio_device_info);
3848}
3849
3850type_init(virtio_register_types)
3851