qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "cpu.h"
  17#include "trace.h"
  18#include "exec/address-spaces.h"
  19#include "qemu/error-report.h"
  20#include "qemu/log.h"
  21#include "qemu/main-loop.h"
  22#include "qemu/module.h"
  23#include "hw/virtio/virtio.h"
  24#include "migration/qemu-file-types.h"
  25#include "qemu/atomic.h"
  26#include "hw/virtio/virtio-bus.h"
  27#include "hw/qdev-properties.h"
  28#include "hw/virtio/virtio-access.h"
  29#include "sysemu/dma.h"
  30#include "sysemu/runstate.h"
  31#include "standard-headers/linux/virtio_ids.h"
  32
  33/*
  34 * The alignment to use between consumer and producer parts of vring.
  35 * x86 pagesize again. This is the default, used by transports like PCI
  36 * which don't provide a means for the guest to tell the host the alignment.
  37 */
  38#define VIRTIO_PCI_VRING_ALIGN         4096
  39
  40typedef struct VRingDesc
  41{
  42    uint64_t addr;
  43    uint32_t len;
  44    uint16_t flags;
  45    uint16_t next;
  46} VRingDesc;
  47
  48typedef struct VRingPackedDesc {
  49    uint64_t addr;
  50    uint32_t len;
  51    uint16_t id;
  52    uint16_t flags;
  53} VRingPackedDesc;
  54
  55typedef struct VRingAvail
  56{
  57    uint16_t flags;
  58    uint16_t idx;
  59    uint16_t ring[];
  60} VRingAvail;
  61
  62typedef struct VRingUsedElem
  63{
  64    uint32_t id;
  65    uint32_t len;
  66} VRingUsedElem;
  67
  68typedef struct VRingUsed
  69{
  70    uint16_t flags;
  71    uint16_t idx;
  72    VRingUsedElem ring[];
  73} VRingUsed;
  74
  75typedef struct VRingMemoryRegionCaches {
  76    struct rcu_head rcu;
  77    MemoryRegionCache desc;
  78    MemoryRegionCache avail;
  79    MemoryRegionCache used;
  80} VRingMemoryRegionCaches;
  81
  82typedef struct VRing
  83{
  84    unsigned int num;
  85    unsigned int num_default;
  86    unsigned int align;
  87    hwaddr desc;
  88    hwaddr avail;
  89    hwaddr used;
  90    VRingMemoryRegionCaches *caches;
  91} VRing;
  92
  93typedef struct VRingPackedDescEvent {
  94    uint16_t off_wrap;
  95    uint16_t flags;
  96} VRingPackedDescEvent ;
  97
  98struct VirtQueue
  99{
 100    VRing vring;
 101    VirtQueueElement *used_elems;
 102
 103    /* Next head to pop */
 104    uint16_t last_avail_idx;
 105    bool last_avail_wrap_counter;
 106
 107    /* Last avail_idx read from VQ. */
 108    uint16_t shadow_avail_idx;
 109    bool shadow_avail_wrap_counter;
 110
 111    uint16_t used_idx;
 112    bool used_wrap_counter;
 113
 114    /* Last used index value we have signalled on */
 115    uint16_t signalled_used;
 116
 117    /* Last used index value we have signalled on */
 118    bool signalled_used_valid;
 119
 120    /* Notification enabled? */
 121    bool notification;
 122
 123    uint16_t queue_index;
 124
 125    unsigned int inuse;
 126
 127    uint16_t vector;
 128    VirtIOHandleOutput handle_output;
 129    VirtIOHandleAIOOutput handle_aio_output;
 130    VirtIODevice *vdev;
 131    EventNotifier guest_notifier;
 132    EventNotifier host_notifier;
 133    bool host_notifier_enabled;
 134    QLIST_ENTRY(VirtQueue) node;
 135};
 136
 137static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 138{
 139    if (!caches) {
 140        return;
 141    }
 142
 143    address_space_cache_destroy(&caches->desc);
 144    address_space_cache_destroy(&caches->avail);
 145    address_space_cache_destroy(&caches->used);
 146    g_free(caches);
 147}
 148
 149static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 150{
 151    VRingMemoryRegionCaches *caches;
 152
 153    caches = qatomic_read(&vq->vring.caches);
 154    qatomic_rcu_set(&vq->vring.caches, NULL);
 155    if (caches) {
 156        call_rcu(caches, virtio_free_region_cache, rcu);
 157    }
 158}
 159
 160static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 161{
 162    VirtQueue *vq = &vdev->vq[n];
 163    VRingMemoryRegionCaches *old = vq->vring.caches;
 164    VRingMemoryRegionCaches *new = NULL;
 165    hwaddr addr, size;
 166    int64_t len;
 167    bool packed;
 168
 169
 170    addr = vq->vring.desc;
 171    if (!addr) {
 172        goto out_no_cache;
 173    }
 174    new = g_new0(VRingMemoryRegionCaches, 1);
 175    size = virtio_queue_get_desc_size(vdev, n);
 176    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 177                                   true : false;
 178    len = address_space_cache_init(&new->desc, vdev->dma_as,
 179                                   addr, size, packed);
 180    if (len < size) {
 181        virtio_error(vdev, "Cannot map desc");
 182        goto err_desc;
 183    }
 184
 185    size = virtio_queue_get_used_size(vdev, n);
 186    len = address_space_cache_init(&new->used, vdev->dma_as,
 187                                   vq->vring.used, size, true);
 188    if (len < size) {
 189        virtio_error(vdev, "Cannot map used");
 190        goto err_used;
 191    }
 192
 193    size = virtio_queue_get_avail_size(vdev, n);
 194    len = address_space_cache_init(&new->avail, vdev->dma_as,
 195                                   vq->vring.avail, size, false);
 196    if (len < size) {
 197        virtio_error(vdev, "Cannot map avail");
 198        goto err_avail;
 199    }
 200
 201    qatomic_rcu_set(&vq->vring.caches, new);
 202    if (old) {
 203        call_rcu(old, virtio_free_region_cache, rcu);
 204    }
 205    return;
 206
 207err_avail:
 208    address_space_cache_destroy(&new->avail);
 209err_used:
 210    address_space_cache_destroy(&new->used);
 211err_desc:
 212    address_space_cache_destroy(&new->desc);
 213out_no_cache:
 214    g_free(new);
 215    virtio_virtqueue_reset_region_cache(vq);
 216}
 217
 218/* virt queue functions */
 219void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 220{
 221    VRing *vring = &vdev->vq[n].vring;
 222
 223    if (!vring->num || !vring->desc || !vring->align) {
 224        /* not yet setup -> nothing to do */
 225        return;
 226    }
 227    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 228    vring->used = vring_align(vring->avail +
 229                              offsetof(VRingAvail, ring[vring->num]),
 230                              vring->align);
 231    virtio_init_region_cache(vdev, n);
 232}
 233
 234/* Called within rcu_read_lock().  */
 235static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 236                                  MemoryRegionCache *cache, int i)
 237{
 238    address_space_read_cached(cache, i * sizeof(VRingDesc),
 239                              desc, sizeof(VRingDesc));
 240    virtio_tswap64s(vdev, &desc->addr);
 241    virtio_tswap32s(vdev, &desc->len);
 242    virtio_tswap16s(vdev, &desc->flags);
 243    virtio_tswap16s(vdev, &desc->next);
 244}
 245
 246static void vring_packed_event_read(VirtIODevice *vdev,
 247                                    MemoryRegionCache *cache,
 248                                    VRingPackedDescEvent *e)
 249{
 250    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 251    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 252
 253    address_space_read_cached(cache, off_flags, &e->flags,
 254                              sizeof(e->flags));
 255    /* Make sure flags is seen before off_wrap */
 256    smp_rmb();
 257    address_space_read_cached(cache, off_off, &e->off_wrap,
 258                              sizeof(e->off_wrap));
 259    virtio_tswap16s(vdev, &e->off_wrap);
 260    virtio_tswap16s(vdev, &e->flags);
 261}
 262
 263static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 264                                        MemoryRegionCache *cache,
 265                                        uint16_t off_wrap)
 266{
 267    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 268
 269    virtio_tswap16s(vdev, &off_wrap);
 270    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
 271    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 272}
 273
 274static void vring_packed_flags_write(VirtIODevice *vdev,
 275                                     MemoryRegionCache *cache, uint16_t flags)
 276{
 277    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 278
 279    virtio_tswap16s(vdev, &flags);
 280    address_space_write_cached(cache, off, &flags, sizeof(flags));
 281    address_space_cache_invalidate(cache, off, sizeof(flags));
 282}
 283
 284/* Called within rcu_read_lock().  */
 285static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 286{
 287    return qatomic_rcu_read(&vq->vring.caches);
 288}
 289
 290/* Called within rcu_read_lock().  */
 291static inline uint16_t vring_avail_flags(VirtQueue *vq)
 292{
 293    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 294    hwaddr pa = offsetof(VRingAvail, flags);
 295
 296    if (!caches) {
 297        return 0;
 298    }
 299
 300    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 301}
 302
 303/* Called within rcu_read_lock().  */
 304static inline uint16_t vring_avail_idx(VirtQueue *vq)
 305{
 306    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 307    hwaddr pa = offsetof(VRingAvail, idx);
 308
 309    if (!caches) {
 310        return 0;
 311    }
 312
 313    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 314    return vq->shadow_avail_idx;
 315}
 316
 317/* Called within rcu_read_lock().  */
 318static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 319{
 320    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 321    hwaddr pa = offsetof(VRingAvail, ring[i]);
 322
 323    if (!caches) {
 324        return 0;
 325    }
 326
 327    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 328}
 329
 330/* Called within rcu_read_lock().  */
 331static inline uint16_t vring_get_used_event(VirtQueue *vq)
 332{
 333    return vring_avail_ring(vq, vq->vring.num);
 334}
 335
 336/* Called within rcu_read_lock().  */
 337static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 338                                    int i)
 339{
 340    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 341    hwaddr pa = offsetof(VRingUsed, ring[i]);
 342
 343    if (!caches) {
 344        return;
 345    }
 346
 347    virtio_tswap32s(vq->vdev, &uelem->id);
 348    virtio_tswap32s(vq->vdev, &uelem->len);
 349    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 350    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 351}
 352
 353/* Called within rcu_read_lock().  */
 354static uint16_t vring_used_idx(VirtQueue *vq)
 355{
 356    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 357    hwaddr pa = offsetof(VRingUsed, idx);
 358
 359    if (!caches) {
 360        return 0;
 361    }
 362
 363    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 364}
 365
 366/* Called within rcu_read_lock().  */
 367static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 368{
 369    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 370    hwaddr pa = offsetof(VRingUsed, idx);
 371
 372    if (caches) {
 373        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 374        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 375    }
 376
 377    vq->used_idx = val;
 378}
 379
 380/* Called within rcu_read_lock().  */
 381static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 382{
 383    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 384    VirtIODevice *vdev = vq->vdev;
 385    hwaddr pa = offsetof(VRingUsed, flags);
 386    uint16_t flags;
 387
 388    if (!caches) {
 389        return;
 390    }
 391
 392    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 393    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 394    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 395}
 396
 397/* Called within rcu_read_lock().  */
 398static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 399{
 400    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 401    VirtIODevice *vdev = vq->vdev;
 402    hwaddr pa = offsetof(VRingUsed, flags);
 403    uint16_t flags;
 404
 405    if (!caches) {
 406        return;
 407    }
 408
 409    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 410    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 411    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 412}
 413
 414/* Called within rcu_read_lock().  */
 415static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 416{
 417    VRingMemoryRegionCaches *caches;
 418    hwaddr pa;
 419    if (!vq->notification) {
 420        return;
 421    }
 422
 423    caches = vring_get_region_caches(vq);
 424    if (!caches) {
 425        return;
 426    }
 427
 428    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 429    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 430    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 431}
 432
 433static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 434{
 435    RCU_READ_LOCK_GUARD();
 436
 437    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 438        vring_set_avail_event(vq, vring_avail_idx(vq));
 439    } else if (enable) {
 440        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 441    } else {
 442        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 443    }
 444    if (enable) {
 445        /* Expose avail event/used flags before caller checks the avail idx. */
 446        smp_mb();
 447    }
 448}
 449
 450static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 451{
 452    uint16_t off_wrap;
 453    VRingPackedDescEvent e;
 454    VRingMemoryRegionCaches *caches;
 455
 456    RCU_READ_LOCK_GUARD();
 457    caches = vring_get_region_caches(vq);
 458    if (!caches) {
 459        return;
 460    }
 461
 462    vring_packed_event_read(vq->vdev, &caches->used, &e);
 463
 464    if (!enable) {
 465        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 466    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 467        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 468        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 469        /* Make sure off_wrap is wrote before flags */
 470        smp_wmb();
 471        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 472    } else {
 473        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 474    }
 475
 476    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 477    if (enable) {
 478        /* Expose avail event/used flags before caller checks the avail idx. */
 479        smp_mb();
 480    }
 481}
 482
 483bool virtio_queue_get_notification(VirtQueue *vq)
 484{
 485    return vq->notification;
 486}
 487
 488void virtio_queue_set_notification(VirtQueue *vq, int enable)
 489{
 490    vq->notification = enable;
 491
 492    if (!vq->vring.desc) {
 493        return;
 494    }
 495
 496    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 497        virtio_queue_packed_set_notification(vq, enable);
 498    } else {
 499        virtio_queue_split_set_notification(vq, enable);
 500    }
 501}
 502
 503int virtio_queue_ready(VirtQueue *vq)
 504{
 505    return vq->vring.avail != 0;
 506}
 507
 508static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 509                                         uint16_t *flags,
 510                                         MemoryRegionCache *cache,
 511                                         int i)
 512{
 513    address_space_read_cached(cache,
 514                              i * sizeof(VRingPackedDesc) +
 515                              offsetof(VRingPackedDesc, flags),
 516                              flags, sizeof(*flags));
 517    virtio_tswap16s(vdev, flags);
 518}
 519
 520static void vring_packed_desc_read(VirtIODevice *vdev,
 521                                   VRingPackedDesc *desc,
 522                                   MemoryRegionCache *cache,
 523                                   int i, bool strict_order)
 524{
 525    hwaddr off = i * sizeof(VRingPackedDesc);
 526
 527    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 528
 529    if (strict_order) {
 530        /* Make sure flags is read before the rest fields. */
 531        smp_rmb();
 532    }
 533
 534    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 535                              &desc->addr, sizeof(desc->addr));
 536    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 537                              &desc->id, sizeof(desc->id));
 538    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 539                              &desc->len, sizeof(desc->len));
 540    virtio_tswap64s(vdev, &desc->addr);
 541    virtio_tswap16s(vdev, &desc->id);
 542    virtio_tswap32s(vdev, &desc->len);
 543}
 544
 545static void vring_packed_desc_write_data(VirtIODevice *vdev,
 546                                         VRingPackedDesc *desc,
 547                                         MemoryRegionCache *cache,
 548                                         int i)
 549{
 550    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 551                    offsetof(VRingPackedDesc, id);
 552    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 553                    offsetof(VRingPackedDesc, len);
 554
 555    virtio_tswap32s(vdev, &desc->len);
 556    virtio_tswap16s(vdev, &desc->id);
 557    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 558    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 559    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 560    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 561}
 562
 563static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 564                                          VRingPackedDesc *desc,
 565                                          MemoryRegionCache *cache,
 566                                          int i)
 567{
 568    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 569
 570    virtio_tswap16s(vdev, &desc->flags);
 571    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
 572    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 573}
 574
 575static void vring_packed_desc_write(VirtIODevice *vdev,
 576                                    VRingPackedDesc *desc,
 577                                    MemoryRegionCache *cache,
 578                                    int i, bool strict_order)
 579{
 580    vring_packed_desc_write_data(vdev, desc, cache, i);
 581    if (strict_order) {
 582        /* Make sure data is wrote before flags. */
 583        smp_wmb();
 584    }
 585    vring_packed_desc_write_flags(vdev, desc, cache, i);
 586}
 587
 588static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 589{
 590    bool avail, used;
 591
 592    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 593    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 594    return (avail != used) && (avail == wrap_counter);
 595}
 596
 597/* Fetch avail_idx from VQ memory only when we really need to know if
 598 * guest has added some buffers.
 599 * Called within rcu_read_lock().  */
 600static int virtio_queue_empty_rcu(VirtQueue *vq)
 601{
 602    if (virtio_device_disabled(vq->vdev)) {
 603        return 1;
 604    }
 605
 606    if (unlikely(!vq->vring.avail)) {
 607        return 1;
 608    }
 609
 610    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 611        return 0;
 612    }
 613
 614    return vring_avail_idx(vq) == vq->last_avail_idx;
 615}
 616
 617static int virtio_queue_split_empty(VirtQueue *vq)
 618{
 619    bool empty;
 620
 621    if (virtio_device_disabled(vq->vdev)) {
 622        return 1;
 623    }
 624
 625    if (unlikely(!vq->vring.avail)) {
 626        return 1;
 627    }
 628
 629    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 630        return 0;
 631    }
 632
 633    RCU_READ_LOCK_GUARD();
 634    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 635    return empty;
 636}
 637
 638static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 639{
 640    struct VRingPackedDesc desc;
 641    VRingMemoryRegionCaches *cache;
 642
 643    if (unlikely(!vq->vring.desc)) {
 644        return 1;
 645    }
 646
 647    cache = vring_get_region_caches(vq);
 648    if (!cache) {
 649        return 1;
 650    }
 651
 652    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 653                                 vq->last_avail_idx);
 654
 655    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 656}
 657
 658static int virtio_queue_packed_empty(VirtQueue *vq)
 659{
 660    RCU_READ_LOCK_GUARD();
 661    return virtio_queue_packed_empty_rcu(vq);
 662}
 663
 664int virtio_queue_empty(VirtQueue *vq)
 665{
 666    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 667        return virtio_queue_packed_empty(vq);
 668    } else {
 669        return virtio_queue_split_empty(vq);
 670    }
 671}
 672
 673static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 674                               unsigned int len)
 675{
 676    AddressSpace *dma_as = vq->vdev->dma_as;
 677    unsigned int offset;
 678    int i;
 679
 680    offset = 0;
 681    for (i = 0; i < elem->in_num; i++) {
 682        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 683
 684        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 685                         elem->in_sg[i].iov_len,
 686                         DMA_DIRECTION_FROM_DEVICE, size);
 687
 688        offset += size;
 689    }
 690
 691    for (i = 0; i < elem->out_num; i++)
 692        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 693                         elem->out_sg[i].iov_len,
 694                         DMA_DIRECTION_TO_DEVICE,
 695                         elem->out_sg[i].iov_len);
 696}
 697
 698/* virtqueue_detach_element:
 699 * @vq: The #VirtQueue
 700 * @elem: The #VirtQueueElement
 701 * @len: number of bytes written
 702 *
 703 * Detach the element from the virtqueue.  This function is suitable for device
 704 * reset or other situations where a #VirtQueueElement is simply freed and will
 705 * not be pushed or discarded.
 706 */
 707void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 708                              unsigned int len)
 709{
 710    vq->inuse -= elem->ndescs;
 711    virtqueue_unmap_sg(vq, elem, len);
 712}
 713
 714static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 715{
 716    vq->last_avail_idx -= num;
 717}
 718
 719static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 720{
 721    if (vq->last_avail_idx < num) {
 722        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 723        vq->last_avail_wrap_counter ^= 1;
 724    } else {
 725        vq->last_avail_idx -= num;
 726    }
 727}
 728
 729/* virtqueue_unpop:
 730 * @vq: The #VirtQueue
 731 * @elem: The #VirtQueueElement
 732 * @len: number of bytes written
 733 *
 734 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 735 * call to virtqueue_pop() will refetch the element.
 736 */
 737void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 738                     unsigned int len)
 739{
 740
 741    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 742        virtqueue_packed_rewind(vq, 1);
 743    } else {
 744        virtqueue_split_rewind(vq, 1);
 745    }
 746
 747    virtqueue_detach_element(vq, elem, len);
 748}
 749
 750/* virtqueue_rewind:
 751 * @vq: The #VirtQueue
 752 * @num: Number of elements to push back
 753 *
 754 * Pretend that elements weren't popped from the virtqueue.  The next
 755 * virtqueue_pop() will refetch the oldest element.
 756 *
 757 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 758 *
 759 * Returns: true on success, false if @num is greater than the number of in use
 760 * elements.
 761 */
 762bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 763{
 764    if (num > vq->inuse) {
 765        return false;
 766    }
 767
 768    vq->inuse -= num;
 769    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 770        virtqueue_packed_rewind(vq, num);
 771    } else {
 772        virtqueue_split_rewind(vq, num);
 773    }
 774    return true;
 775}
 776
 777static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 778                    unsigned int len, unsigned int idx)
 779{
 780    VRingUsedElem uelem;
 781
 782    if (unlikely(!vq->vring.used)) {
 783        return;
 784    }
 785
 786    idx = (idx + vq->used_idx) % vq->vring.num;
 787
 788    uelem.id = elem->index;
 789    uelem.len = len;
 790    vring_used_write(vq, &uelem, idx);
 791}
 792
 793static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 794                                  unsigned int len, unsigned int idx)
 795{
 796    vq->used_elems[idx].index = elem->index;
 797    vq->used_elems[idx].len = len;
 798    vq->used_elems[idx].ndescs = elem->ndescs;
 799}
 800
 801static void virtqueue_packed_fill_desc(VirtQueue *vq,
 802                                       const VirtQueueElement *elem,
 803                                       unsigned int idx,
 804                                       bool strict_order)
 805{
 806    uint16_t head;
 807    VRingMemoryRegionCaches *caches;
 808    VRingPackedDesc desc = {
 809        .id = elem->index,
 810        .len = elem->len,
 811    };
 812    bool wrap_counter = vq->used_wrap_counter;
 813
 814    if (unlikely(!vq->vring.desc)) {
 815        return;
 816    }
 817
 818    head = vq->used_idx + idx;
 819    if (head >= vq->vring.num) {
 820        head -= vq->vring.num;
 821        wrap_counter ^= 1;
 822    }
 823    if (wrap_counter) {
 824        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 825        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 826    } else {
 827        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 828        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 829    }
 830
 831    caches = vring_get_region_caches(vq);
 832    if (!caches) {
 833        return;
 834    }
 835
 836    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 837}
 838
 839/* Called within rcu_read_lock().  */
 840void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 841                    unsigned int len, unsigned int idx)
 842{
 843    trace_virtqueue_fill(vq, elem, len, idx);
 844
 845    virtqueue_unmap_sg(vq, elem, len);
 846
 847    if (virtio_device_disabled(vq->vdev)) {
 848        return;
 849    }
 850
 851    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 852        virtqueue_packed_fill(vq, elem, len, idx);
 853    } else {
 854        virtqueue_split_fill(vq, elem, len, idx);
 855    }
 856}
 857
 858/* Called within rcu_read_lock().  */
 859static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 860{
 861    uint16_t old, new;
 862
 863    if (unlikely(!vq->vring.used)) {
 864        return;
 865    }
 866
 867    /* Make sure buffer is written before we update index. */
 868    smp_wmb();
 869    trace_virtqueue_flush(vq, count);
 870    old = vq->used_idx;
 871    new = old + count;
 872    vring_used_idx_set(vq, new);
 873    vq->inuse -= count;
 874    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 875        vq->signalled_used_valid = false;
 876}
 877
 878static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 879{
 880    unsigned int i, ndescs = 0;
 881
 882    if (unlikely(!vq->vring.desc)) {
 883        return;
 884    }
 885
 886    for (i = 1; i < count; i++) {
 887        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 888        ndescs += vq->used_elems[i].ndescs;
 889    }
 890    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 891    ndescs += vq->used_elems[0].ndescs;
 892
 893    vq->inuse -= ndescs;
 894    vq->used_idx += ndescs;
 895    if (vq->used_idx >= vq->vring.num) {
 896        vq->used_idx -= vq->vring.num;
 897        vq->used_wrap_counter ^= 1;
 898    }
 899}
 900
 901void virtqueue_flush(VirtQueue *vq, unsigned int count)
 902{
 903    if (virtio_device_disabled(vq->vdev)) {
 904        vq->inuse -= count;
 905        return;
 906    }
 907
 908    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 909        virtqueue_packed_flush(vq, count);
 910    } else {
 911        virtqueue_split_flush(vq, count);
 912    }
 913}
 914
 915void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 916                    unsigned int len)
 917{
 918    RCU_READ_LOCK_GUARD();
 919    virtqueue_fill(vq, elem, len, 0);
 920    virtqueue_flush(vq, 1);
 921}
 922
 923/* Called within rcu_read_lock().  */
 924static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 925{
 926    uint16_t num_heads = vring_avail_idx(vq) - idx;
 927
 928    /* Check it isn't doing very strange things with descriptor numbers. */
 929    if (num_heads > vq->vring.num) {
 930        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 931                     idx, vq->shadow_avail_idx);
 932        return -EINVAL;
 933    }
 934    /* On success, callers read a descriptor at vq->last_avail_idx.
 935     * Make sure descriptor read does not bypass avail index read. */
 936    if (num_heads) {
 937        smp_rmb();
 938    }
 939
 940    return num_heads;
 941}
 942
 943/* Called within rcu_read_lock().  */
 944static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 945                               unsigned int *head)
 946{
 947    /* Grab the next descriptor number they're advertising, and increment
 948     * the index we've seen. */
 949    *head = vring_avail_ring(vq, idx % vq->vring.num);
 950
 951    /* If their number is silly, that's a fatal mistake. */
 952    if (*head >= vq->vring.num) {
 953        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 954        return false;
 955    }
 956
 957    return true;
 958}
 959
 960enum {
 961    VIRTQUEUE_READ_DESC_ERROR = -1,
 962    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 963    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
 964};
 965
 966static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
 967                                          MemoryRegionCache *desc_cache,
 968                                          unsigned int max, unsigned int *next)
 969{
 970    /* If this descriptor says it doesn't chain, we're done. */
 971    if (!(desc->flags & VRING_DESC_F_NEXT)) {
 972        return VIRTQUEUE_READ_DESC_DONE;
 973    }
 974
 975    /* Check they're not leading us off end of descriptors. */
 976    *next = desc->next;
 977    /* Make sure compiler knows to grab that: we don't want it changing! */
 978    smp_wmb();
 979
 980    if (*next >= max) {
 981        virtio_error(vdev, "Desc next is %u", *next);
 982        return VIRTQUEUE_READ_DESC_ERROR;
 983    }
 984
 985    vring_split_desc_read(vdev, desc, desc_cache, *next);
 986    return VIRTQUEUE_READ_DESC_MORE;
 987}
 988
 989static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
 990                            unsigned int *in_bytes, unsigned int *out_bytes,
 991                            unsigned max_in_bytes, unsigned max_out_bytes)
 992{
 993    VirtIODevice *vdev = vq->vdev;
 994    unsigned int max, idx;
 995    unsigned int total_bufs, in_total, out_total;
 996    VRingMemoryRegionCaches *caches;
 997    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
 998    int64_t len = 0;
 999    int rc;
1000
1001    RCU_READ_LOCK_GUARD();
1002
1003    idx = vq->last_avail_idx;
1004    total_bufs = in_total = out_total = 0;
1005
1006    max = vq->vring.num;
1007    caches = vring_get_region_caches(vq);
1008    if (!caches) {
1009        goto err;
1010    }
1011
1012    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1013        MemoryRegionCache *desc_cache = &caches->desc;
1014        unsigned int num_bufs;
1015        VRingDesc desc;
1016        unsigned int i;
1017
1018        num_bufs = total_bufs;
1019
1020        if (!virtqueue_get_head(vq, idx++, &i)) {
1021            goto err;
1022        }
1023
1024        vring_split_desc_read(vdev, &desc, desc_cache, i);
1025
1026        if (desc.flags & VRING_DESC_F_INDIRECT) {
1027            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1028                virtio_error(vdev, "Invalid size for indirect buffer table");
1029                goto err;
1030            }
1031
1032            /* If we've got too many, that implies a descriptor loop. */
1033            if (num_bufs >= max) {
1034                virtio_error(vdev, "Looped descriptor");
1035                goto err;
1036            }
1037
1038            /* loop over the indirect descriptor table */
1039            len = address_space_cache_init(&indirect_desc_cache,
1040                                           vdev->dma_as,
1041                                           desc.addr, desc.len, false);
1042            desc_cache = &indirect_desc_cache;
1043            if (len < desc.len) {
1044                virtio_error(vdev, "Cannot map indirect buffer");
1045                goto err;
1046            }
1047
1048            max = desc.len / sizeof(VRingDesc);
1049            num_bufs = i = 0;
1050            vring_split_desc_read(vdev, &desc, desc_cache, i);
1051        }
1052
1053        do {
1054            /* If we've got too many, that implies a descriptor loop. */
1055            if (++num_bufs > max) {
1056                virtio_error(vdev, "Looped descriptor");
1057                goto err;
1058            }
1059
1060            if (desc.flags & VRING_DESC_F_WRITE) {
1061                in_total += desc.len;
1062            } else {
1063                out_total += desc.len;
1064            }
1065            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1066                goto done;
1067            }
1068
1069            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1070        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1071
1072        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1073            goto err;
1074        }
1075
1076        if (desc_cache == &indirect_desc_cache) {
1077            address_space_cache_destroy(&indirect_desc_cache);
1078            total_bufs++;
1079        } else {
1080            total_bufs = num_bufs;
1081        }
1082    }
1083
1084    if (rc < 0) {
1085        goto err;
1086    }
1087
1088done:
1089    address_space_cache_destroy(&indirect_desc_cache);
1090    if (in_bytes) {
1091        *in_bytes = in_total;
1092    }
1093    if (out_bytes) {
1094        *out_bytes = out_total;
1095    }
1096    return;
1097
1098err:
1099    in_total = out_total = 0;
1100    goto done;
1101}
1102
1103static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1104                                           VRingPackedDesc *desc,
1105                                           MemoryRegionCache
1106                                           *desc_cache,
1107                                           unsigned int max,
1108                                           unsigned int *next,
1109                                           bool indirect)
1110{
1111    /* If this descriptor says it doesn't chain, we're done. */
1112    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1113        return VIRTQUEUE_READ_DESC_DONE;
1114    }
1115
1116    ++*next;
1117    if (*next == max) {
1118        if (indirect) {
1119            return VIRTQUEUE_READ_DESC_DONE;
1120        } else {
1121            (*next) -= vq->vring.num;
1122        }
1123    }
1124
1125    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1126    return VIRTQUEUE_READ_DESC_MORE;
1127}
1128
1129static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1130                                             unsigned int *in_bytes,
1131                                             unsigned int *out_bytes,
1132                                             unsigned max_in_bytes,
1133                                             unsigned max_out_bytes)
1134{
1135    VirtIODevice *vdev = vq->vdev;
1136    unsigned int max, idx;
1137    unsigned int total_bufs, in_total, out_total;
1138    MemoryRegionCache *desc_cache;
1139    VRingMemoryRegionCaches *caches;
1140    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1141    int64_t len = 0;
1142    VRingPackedDesc desc;
1143    bool wrap_counter;
1144
1145    RCU_READ_LOCK_GUARD();
1146    idx = vq->last_avail_idx;
1147    wrap_counter = vq->last_avail_wrap_counter;
1148    total_bufs = in_total = out_total = 0;
1149
1150    max = vq->vring.num;
1151    caches = vring_get_region_caches(vq);
1152    if (!caches) {
1153        goto err;
1154    }
1155
1156    for (;;) {
1157        unsigned int num_bufs = total_bufs;
1158        unsigned int i = idx;
1159        int rc;
1160
1161        desc_cache = &caches->desc;
1162        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1163        if (!is_desc_avail(desc.flags, wrap_counter)) {
1164            break;
1165        }
1166
1167        if (desc.flags & VRING_DESC_F_INDIRECT) {
1168            if (desc.len % sizeof(VRingPackedDesc)) {
1169                virtio_error(vdev, "Invalid size for indirect buffer table");
1170                goto err;
1171            }
1172
1173            /* If we've got too many, that implies a descriptor loop. */
1174            if (num_bufs >= max) {
1175                virtio_error(vdev, "Looped descriptor");
1176                goto err;
1177            }
1178
1179            /* loop over the indirect descriptor table */
1180            len = address_space_cache_init(&indirect_desc_cache,
1181                                           vdev->dma_as,
1182                                           desc.addr, desc.len, false);
1183            desc_cache = &indirect_desc_cache;
1184            if (len < desc.len) {
1185                virtio_error(vdev, "Cannot map indirect buffer");
1186                goto err;
1187            }
1188
1189            max = desc.len / sizeof(VRingPackedDesc);
1190            num_bufs = i = 0;
1191            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1192        }
1193
1194        do {
1195            /* If we've got too many, that implies a descriptor loop. */
1196            if (++num_bufs > max) {
1197                virtio_error(vdev, "Looped descriptor");
1198                goto err;
1199            }
1200
1201            if (desc.flags & VRING_DESC_F_WRITE) {
1202                in_total += desc.len;
1203            } else {
1204                out_total += desc.len;
1205            }
1206            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1207                goto done;
1208            }
1209
1210            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1211                                                 &i, desc_cache ==
1212                                                 &indirect_desc_cache);
1213        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1214
1215        if (desc_cache == &indirect_desc_cache) {
1216            address_space_cache_destroy(&indirect_desc_cache);
1217            total_bufs++;
1218            idx++;
1219        } else {
1220            idx += num_bufs - total_bufs;
1221            total_bufs = num_bufs;
1222        }
1223
1224        if (idx >= vq->vring.num) {
1225            idx -= vq->vring.num;
1226            wrap_counter ^= 1;
1227        }
1228    }
1229
1230    /* Record the index and wrap counter for a kick we want */
1231    vq->shadow_avail_idx = idx;
1232    vq->shadow_avail_wrap_counter = wrap_counter;
1233done:
1234    address_space_cache_destroy(&indirect_desc_cache);
1235    if (in_bytes) {
1236        *in_bytes = in_total;
1237    }
1238    if (out_bytes) {
1239        *out_bytes = out_total;
1240    }
1241    return;
1242
1243err:
1244    in_total = out_total = 0;
1245    goto done;
1246}
1247
1248void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1249                               unsigned int *out_bytes,
1250                               unsigned max_in_bytes, unsigned max_out_bytes)
1251{
1252    uint16_t desc_size;
1253    VRingMemoryRegionCaches *caches;
1254
1255    if (unlikely(!vq->vring.desc)) {
1256        goto err;
1257    }
1258
1259    caches = vring_get_region_caches(vq);
1260    if (!caches) {
1261        goto err;
1262    }
1263
1264    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1265                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1266    if (caches->desc.len < vq->vring.num * desc_size) {
1267        virtio_error(vq->vdev, "Cannot map descriptor ring");
1268        goto err;
1269    }
1270
1271    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1272        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1273                                         max_in_bytes, max_out_bytes);
1274    } else {
1275        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1276                                        max_in_bytes, max_out_bytes);
1277    }
1278
1279    return;
1280err:
1281    if (in_bytes) {
1282        *in_bytes = 0;
1283    }
1284    if (out_bytes) {
1285        *out_bytes = 0;
1286    }
1287}
1288
1289int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1290                          unsigned int out_bytes)
1291{
1292    unsigned int in_total, out_total;
1293
1294    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1295    return in_bytes <= in_total && out_bytes <= out_total;
1296}
1297
1298static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1299                               hwaddr *addr, struct iovec *iov,
1300                               unsigned int max_num_sg, bool is_write,
1301                               hwaddr pa, size_t sz)
1302{
1303    bool ok = false;
1304    unsigned num_sg = *p_num_sg;
1305    assert(num_sg <= max_num_sg);
1306
1307    if (!sz) {
1308        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1309        goto out;
1310    }
1311
1312    while (sz) {
1313        hwaddr len = sz;
1314
1315        if (num_sg == max_num_sg) {
1316            virtio_error(vdev, "virtio: too many write descriptors in "
1317                               "indirect table");
1318            goto out;
1319        }
1320
1321        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1322                                              is_write ?
1323                                              DMA_DIRECTION_FROM_DEVICE :
1324                                              DMA_DIRECTION_TO_DEVICE);
1325        if (!iov[num_sg].iov_base) {
1326            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1327            goto out;
1328        }
1329
1330        iov[num_sg].iov_len = len;
1331        addr[num_sg] = pa;
1332
1333        sz -= len;
1334        pa += len;
1335        num_sg++;
1336    }
1337    ok = true;
1338
1339out:
1340    *p_num_sg = num_sg;
1341    return ok;
1342}
1343
1344/* Only used by error code paths before we have a VirtQueueElement (therefore
1345 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1346 * yet.
1347 */
1348static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1349                                    struct iovec *iov)
1350{
1351    unsigned int i;
1352
1353    for (i = 0; i < out_num + in_num; i++) {
1354        int is_write = i >= out_num;
1355
1356        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1357        iov++;
1358    }
1359}
1360
1361static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1362                                hwaddr *addr, unsigned int num_sg,
1363                                bool is_write)
1364{
1365    unsigned int i;
1366    hwaddr len;
1367
1368    for (i = 0; i < num_sg; i++) {
1369        len = sg[i].iov_len;
1370        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1371                                        addr[i], &len, is_write ?
1372                                        DMA_DIRECTION_FROM_DEVICE :
1373                                        DMA_DIRECTION_TO_DEVICE);
1374        if (!sg[i].iov_base) {
1375            error_report("virtio: error trying to map MMIO memory");
1376            exit(1);
1377        }
1378        if (len != sg[i].iov_len) {
1379            error_report("virtio: unexpected memory split");
1380            exit(1);
1381        }
1382    }
1383}
1384
1385void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1386{
1387    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1388    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1389                                                                        false);
1390}
1391
1392static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1393{
1394    VirtQueueElement *elem;
1395    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1396    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1397    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1398    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1399    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1400    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1401
1402    assert(sz >= sizeof(VirtQueueElement));
1403    elem = g_malloc(out_sg_end);
1404    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1405    elem->out_num = out_num;
1406    elem->in_num = in_num;
1407    elem->in_addr = (void *)elem + in_addr_ofs;
1408    elem->out_addr = (void *)elem + out_addr_ofs;
1409    elem->in_sg = (void *)elem + in_sg_ofs;
1410    elem->out_sg = (void *)elem + out_sg_ofs;
1411    return elem;
1412}
1413
1414static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1415{
1416    unsigned int i, head, max;
1417    VRingMemoryRegionCaches *caches;
1418    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1419    MemoryRegionCache *desc_cache;
1420    int64_t len;
1421    VirtIODevice *vdev = vq->vdev;
1422    VirtQueueElement *elem = NULL;
1423    unsigned out_num, in_num, elem_entries;
1424    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1425    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1426    VRingDesc desc;
1427    int rc;
1428
1429    RCU_READ_LOCK_GUARD();
1430    if (virtio_queue_empty_rcu(vq)) {
1431        goto done;
1432    }
1433    /* Needed after virtio_queue_empty(), see comment in
1434     * virtqueue_num_heads(). */
1435    smp_rmb();
1436
1437    /* When we start there are none of either input nor output. */
1438    out_num = in_num = elem_entries = 0;
1439
1440    max = vq->vring.num;
1441
1442    if (vq->inuse >= vq->vring.num) {
1443        virtio_error(vdev, "Virtqueue size exceeded");
1444        goto done;
1445    }
1446
1447    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1448        goto done;
1449    }
1450
1451    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1452        vring_set_avail_event(vq, vq->last_avail_idx);
1453    }
1454
1455    i = head;
1456
1457    caches = vring_get_region_caches(vq);
1458    if (!caches) {
1459        virtio_error(vdev, "Region caches not initialized");
1460        goto done;
1461    }
1462
1463    if (caches->desc.len < max * sizeof(VRingDesc)) {
1464        virtio_error(vdev, "Cannot map descriptor ring");
1465        goto done;
1466    }
1467
1468    desc_cache = &caches->desc;
1469    vring_split_desc_read(vdev, &desc, desc_cache, i);
1470    if (desc.flags & VRING_DESC_F_INDIRECT) {
1471        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1472            virtio_error(vdev, "Invalid size for indirect buffer table");
1473            goto done;
1474        }
1475
1476        /* loop over the indirect descriptor table */
1477        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1478                                       desc.addr, desc.len, false);
1479        desc_cache = &indirect_desc_cache;
1480        if (len < desc.len) {
1481            virtio_error(vdev, "Cannot map indirect buffer");
1482            goto done;
1483        }
1484
1485        max = desc.len / sizeof(VRingDesc);
1486        i = 0;
1487        vring_split_desc_read(vdev, &desc, desc_cache, i);
1488    }
1489
1490    /* Collect all the descriptors */
1491    do {
1492        bool map_ok;
1493
1494        if (desc.flags & VRING_DESC_F_WRITE) {
1495            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1496                                        iov + out_num,
1497                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1498                                        desc.addr, desc.len);
1499        } else {
1500            if (in_num) {
1501                virtio_error(vdev, "Incorrect order for descriptors");
1502                goto err_undo_map;
1503            }
1504            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1505                                        VIRTQUEUE_MAX_SIZE, false,
1506                                        desc.addr, desc.len);
1507        }
1508        if (!map_ok) {
1509            goto err_undo_map;
1510        }
1511
1512        /* If we've got too many, that implies a descriptor loop. */
1513        if (++elem_entries > max) {
1514            virtio_error(vdev, "Looped descriptor");
1515            goto err_undo_map;
1516        }
1517
1518        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1519    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1520
1521    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1522        goto err_undo_map;
1523    }
1524
1525    /* Now copy what we have collected and mapped */
1526    elem = virtqueue_alloc_element(sz, out_num, in_num);
1527    elem->index = head;
1528    elem->ndescs = 1;
1529    for (i = 0; i < out_num; i++) {
1530        elem->out_addr[i] = addr[i];
1531        elem->out_sg[i] = iov[i];
1532    }
1533    for (i = 0; i < in_num; i++) {
1534        elem->in_addr[i] = addr[out_num + i];
1535        elem->in_sg[i] = iov[out_num + i];
1536    }
1537
1538    vq->inuse++;
1539
1540    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1541done:
1542    address_space_cache_destroy(&indirect_desc_cache);
1543
1544    return elem;
1545
1546err_undo_map:
1547    virtqueue_undo_map_desc(out_num, in_num, iov);
1548    goto done;
1549}
1550
1551static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1552{
1553    unsigned int i, max;
1554    VRingMemoryRegionCaches *caches;
1555    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1556    MemoryRegionCache *desc_cache;
1557    int64_t len;
1558    VirtIODevice *vdev = vq->vdev;
1559    VirtQueueElement *elem = NULL;
1560    unsigned out_num, in_num, elem_entries;
1561    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1562    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1563    VRingPackedDesc desc;
1564    uint16_t id;
1565    int rc;
1566
1567    RCU_READ_LOCK_GUARD();
1568    if (virtio_queue_packed_empty_rcu(vq)) {
1569        goto done;
1570    }
1571
1572    /* When we start there are none of either input nor output. */
1573    out_num = in_num = elem_entries = 0;
1574
1575    max = vq->vring.num;
1576
1577    if (vq->inuse >= vq->vring.num) {
1578        virtio_error(vdev, "Virtqueue size exceeded");
1579        goto done;
1580    }
1581
1582    i = vq->last_avail_idx;
1583
1584    caches = vring_get_region_caches(vq);
1585    if (!caches) {
1586        virtio_error(vdev, "Region caches not initialized");
1587        goto done;
1588    }
1589
1590    if (caches->desc.len < max * sizeof(VRingDesc)) {
1591        virtio_error(vdev, "Cannot map descriptor ring");
1592        goto done;
1593    }
1594
1595    desc_cache = &caches->desc;
1596    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1597    id = desc.id;
1598    if (desc.flags & VRING_DESC_F_INDIRECT) {
1599        if (desc.len % sizeof(VRingPackedDesc)) {
1600            virtio_error(vdev, "Invalid size for indirect buffer table");
1601            goto done;
1602        }
1603
1604        /* loop over the indirect descriptor table */
1605        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1606                                       desc.addr, desc.len, false);
1607        desc_cache = &indirect_desc_cache;
1608        if (len < desc.len) {
1609            virtio_error(vdev, "Cannot map indirect buffer");
1610            goto done;
1611        }
1612
1613        max = desc.len / sizeof(VRingPackedDesc);
1614        i = 0;
1615        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1616    }
1617
1618    /* Collect all the descriptors */
1619    do {
1620        bool map_ok;
1621
1622        if (desc.flags & VRING_DESC_F_WRITE) {
1623            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1624                                        iov + out_num,
1625                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1626                                        desc.addr, desc.len);
1627        } else {
1628            if (in_num) {
1629                virtio_error(vdev, "Incorrect order for descriptors");
1630                goto err_undo_map;
1631            }
1632            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1633                                        VIRTQUEUE_MAX_SIZE, false,
1634                                        desc.addr, desc.len);
1635        }
1636        if (!map_ok) {
1637            goto err_undo_map;
1638        }
1639
1640        /* If we've got too many, that implies a descriptor loop. */
1641        if (++elem_entries > max) {
1642            virtio_error(vdev, "Looped descriptor");
1643            goto err_undo_map;
1644        }
1645
1646        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1647                                             desc_cache ==
1648                                             &indirect_desc_cache);
1649    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1650
1651    /* Now copy what we have collected and mapped */
1652    elem = virtqueue_alloc_element(sz, out_num, in_num);
1653    for (i = 0; i < out_num; i++) {
1654        elem->out_addr[i] = addr[i];
1655        elem->out_sg[i] = iov[i];
1656    }
1657    for (i = 0; i < in_num; i++) {
1658        elem->in_addr[i] = addr[out_num + i];
1659        elem->in_sg[i] = iov[out_num + i];
1660    }
1661
1662    elem->index = id;
1663    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1664    vq->last_avail_idx += elem->ndescs;
1665    vq->inuse += elem->ndescs;
1666
1667    if (vq->last_avail_idx >= vq->vring.num) {
1668        vq->last_avail_idx -= vq->vring.num;
1669        vq->last_avail_wrap_counter ^= 1;
1670    }
1671
1672    vq->shadow_avail_idx = vq->last_avail_idx;
1673    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1674
1675    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1676done:
1677    address_space_cache_destroy(&indirect_desc_cache);
1678
1679    return elem;
1680
1681err_undo_map:
1682    virtqueue_undo_map_desc(out_num, in_num, iov);
1683    goto done;
1684}
1685
1686void *virtqueue_pop(VirtQueue *vq, size_t sz)
1687{
1688    if (virtio_device_disabled(vq->vdev)) {
1689        return NULL;
1690    }
1691
1692    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1693        return virtqueue_packed_pop(vq, sz);
1694    } else {
1695        return virtqueue_split_pop(vq, sz);
1696    }
1697}
1698
1699static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1700{
1701    VRingMemoryRegionCaches *caches;
1702    MemoryRegionCache *desc_cache;
1703    unsigned int dropped = 0;
1704    VirtQueueElement elem = {};
1705    VirtIODevice *vdev = vq->vdev;
1706    VRingPackedDesc desc;
1707
1708    caches = vring_get_region_caches(vq);
1709    if (!caches) {
1710        return 0;
1711    }
1712
1713    desc_cache = &caches->desc;
1714
1715    virtio_queue_set_notification(vq, 0);
1716
1717    while (vq->inuse < vq->vring.num) {
1718        unsigned int idx = vq->last_avail_idx;
1719        /*
1720         * works similar to virtqueue_pop but does not map buffers
1721         * and does not allocate any memory.
1722         */
1723        vring_packed_desc_read(vdev, &desc, desc_cache,
1724                               vq->last_avail_idx , true);
1725        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1726            break;
1727        }
1728        elem.index = desc.id;
1729        elem.ndescs = 1;
1730        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1731                                               vq->vring.num, &idx, false)) {
1732            ++elem.ndescs;
1733        }
1734        /*
1735         * immediately push the element, nothing to unmap
1736         * as both in_num and out_num are set to 0.
1737         */
1738        virtqueue_push(vq, &elem, 0);
1739        dropped++;
1740        vq->last_avail_idx += elem.ndescs;
1741        if (vq->last_avail_idx >= vq->vring.num) {
1742            vq->last_avail_idx -= vq->vring.num;
1743            vq->last_avail_wrap_counter ^= 1;
1744        }
1745    }
1746
1747    return dropped;
1748}
1749
1750static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1751{
1752    unsigned int dropped = 0;
1753    VirtQueueElement elem = {};
1754    VirtIODevice *vdev = vq->vdev;
1755    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1756
1757    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1758        /* works similar to virtqueue_pop but does not map buffers
1759        * and does not allocate any memory */
1760        smp_rmb();
1761        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1762            break;
1763        }
1764        vq->inuse++;
1765        vq->last_avail_idx++;
1766        if (fEventIdx) {
1767            vring_set_avail_event(vq, vq->last_avail_idx);
1768        }
1769        /* immediately push the element, nothing to unmap
1770         * as both in_num and out_num are set to 0 */
1771        virtqueue_push(vq, &elem, 0);
1772        dropped++;
1773    }
1774
1775    return dropped;
1776}
1777
1778/* virtqueue_drop_all:
1779 * @vq: The #VirtQueue
1780 * Drops all queued buffers and indicates them to the guest
1781 * as if they are done. Useful when buffers can not be
1782 * processed but must be returned to the guest.
1783 */
1784unsigned int virtqueue_drop_all(VirtQueue *vq)
1785{
1786    struct VirtIODevice *vdev = vq->vdev;
1787
1788    if (virtio_device_disabled(vq->vdev)) {
1789        return 0;
1790    }
1791
1792    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1793        return virtqueue_packed_drop_all(vq);
1794    } else {
1795        return virtqueue_split_drop_all(vq);
1796    }
1797}
1798
1799/* Reading and writing a structure directly to QEMUFile is *awful*, but
1800 * it is what QEMU has always done by mistake.  We can change it sooner
1801 * or later by bumping the version number of the affected vm states.
1802 * In the meanwhile, since the in-memory layout of VirtQueueElement
1803 * has changed, we need to marshal to and from the layout that was
1804 * used before the change.
1805 */
1806typedef struct VirtQueueElementOld {
1807    unsigned int index;
1808    unsigned int out_num;
1809    unsigned int in_num;
1810    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1811    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1812    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1813    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1814} VirtQueueElementOld;
1815
1816void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1817{
1818    VirtQueueElement *elem;
1819    VirtQueueElementOld data;
1820    int i;
1821
1822    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1823
1824    /* TODO: teach all callers that this can fail, and return failure instead
1825     * of asserting here.
1826     * This is just one thing (there are probably more) that must be
1827     * fixed before we can allow NDEBUG compilation.
1828     */
1829    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1830    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1831
1832    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1833    elem->index = data.index;
1834
1835    for (i = 0; i < elem->in_num; i++) {
1836        elem->in_addr[i] = data.in_addr[i];
1837    }
1838
1839    for (i = 0; i < elem->out_num; i++) {
1840        elem->out_addr[i] = data.out_addr[i];
1841    }
1842
1843    for (i = 0; i < elem->in_num; i++) {
1844        /* Base is overwritten by virtqueue_map.  */
1845        elem->in_sg[i].iov_base = 0;
1846        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1847    }
1848
1849    for (i = 0; i < elem->out_num; i++) {
1850        /* Base is overwritten by virtqueue_map.  */
1851        elem->out_sg[i].iov_base = 0;
1852        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1853    }
1854
1855    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1856        qemu_get_be32s(f, &elem->ndescs);
1857    }
1858
1859    virtqueue_map(vdev, elem);
1860    return elem;
1861}
1862
1863void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1864                                VirtQueueElement *elem)
1865{
1866    VirtQueueElementOld data;
1867    int i;
1868
1869    memset(&data, 0, sizeof(data));
1870    data.index = elem->index;
1871    data.in_num = elem->in_num;
1872    data.out_num = elem->out_num;
1873
1874    for (i = 0; i < elem->in_num; i++) {
1875        data.in_addr[i] = elem->in_addr[i];
1876    }
1877
1878    for (i = 0; i < elem->out_num; i++) {
1879        data.out_addr[i] = elem->out_addr[i];
1880    }
1881
1882    for (i = 0; i < elem->in_num; i++) {
1883        /* Base is overwritten by virtqueue_map when loading.  Do not
1884         * save it, as it would leak the QEMU address space layout.  */
1885        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1886    }
1887
1888    for (i = 0; i < elem->out_num; i++) {
1889        /* Do not save iov_base as above.  */
1890        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1891    }
1892
1893    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1894        qemu_put_be32s(f, &elem->ndescs);
1895    }
1896
1897    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1898}
1899
1900/* virtio device */
1901static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1902{
1903    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1904    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1905
1906    if (virtio_device_disabled(vdev)) {
1907        return;
1908    }
1909
1910    if (k->notify) {
1911        k->notify(qbus->parent, vector);
1912    }
1913}
1914
1915void virtio_update_irq(VirtIODevice *vdev)
1916{
1917    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1918}
1919
1920static int virtio_validate_features(VirtIODevice *vdev)
1921{
1922    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1923
1924    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1925        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1926        return -EFAULT;
1927    }
1928
1929    if (k->validate_features) {
1930        return k->validate_features(vdev);
1931    } else {
1932        return 0;
1933    }
1934}
1935
1936int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1937{
1938    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1939    trace_virtio_set_status(vdev, val);
1940
1941    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1942        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1943            val & VIRTIO_CONFIG_S_FEATURES_OK) {
1944            int ret = virtio_validate_features(vdev);
1945
1946            if (ret) {
1947                return ret;
1948            }
1949        }
1950    }
1951
1952    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1953        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1954        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1955    }
1956
1957    if (k->set_status) {
1958        k->set_status(vdev, val);
1959    }
1960    vdev->status = val;
1961
1962    return 0;
1963}
1964
1965static enum virtio_device_endian virtio_default_endian(void)
1966{
1967    if (target_words_bigendian()) {
1968        return VIRTIO_DEVICE_ENDIAN_BIG;
1969    } else {
1970        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1971    }
1972}
1973
1974static enum virtio_device_endian virtio_current_cpu_endian(void)
1975{
1976    CPUClass *cc = CPU_GET_CLASS(current_cpu);
1977
1978    if (cc->virtio_is_big_endian(current_cpu)) {
1979        return VIRTIO_DEVICE_ENDIAN_BIG;
1980    } else {
1981        return VIRTIO_DEVICE_ENDIAN_LITTLE;
1982    }
1983}
1984
1985void virtio_reset(void *opaque)
1986{
1987    VirtIODevice *vdev = opaque;
1988    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1989    int i;
1990
1991    virtio_set_status(vdev, 0);
1992    if (current_cpu) {
1993        /* Guest initiated reset */
1994        vdev->device_endian = virtio_current_cpu_endian();
1995    } else {
1996        /* System reset */
1997        vdev->device_endian = virtio_default_endian();
1998    }
1999
2000    if (k->reset) {
2001        k->reset(vdev);
2002    }
2003
2004    vdev->start_on_kick = false;
2005    vdev->started = false;
2006    vdev->broken = false;
2007    vdev->guest_features = 0;
2008    vdev->queue_sel = 0;
2009    vdev->status = 0;
2010    vdev->disabled = false;
2011    qatomic_set(&vdev->isr, 0);
2012    vdev->config_vector = VIRTIO_NO_VECTOR;
2013    virtio_notify_vector(vdev, vdev->config_vector);
2014
2015    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2016        vdev->vq[i].vring.desc = 0;
2017        vdev->vq[i].vring.avail = 0;
2018        vdev->vq[i].vring.used = 0;
2019        vdev->vq[i].last_avail_idx = 0;
2020        vdev->vq[i].shadow_avail_idx = 0;
2021        vdev->vq[i].used_idx = 0;
2022        vdev->vq[i].last_avail_wrap_counter = true;
2023        vdev->vq[i].shadow_avail_wrap_counter = true;
2024        vdev->vq[i].used_wrap_counter = true;
2025        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2026        vdev->vq[i].signalled_used = 0;
2027        vdev->vq[i].signalled_used_valid = false;
2028        vdev->vq[i].notification = true;
2029        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2030        vdev->vq[i].inuse = 0;
2031        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2032    }
2033}
2034
2035uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2036{
2037    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2038    uint8_t val;
2039
2040    if (addr + sizeof(val) > vdev->config_len) {
2041        return (uint32_t)-1;
2042    }
2043
2044    k->get_config(vdev, vdev->config);
2045
2046    val = ldub_p(vdev->config + addr);
2047    return val;
2048}
2049
2050uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2051{
2052    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2053    uint16_t val;
2054
2055    if (addr + sizeof(val) > vdev->config_len) {
2056        return (uint32_t)-1;
2057    }
2058
2059    k->get_config(vdev, vdev->config);
2060
2061    val = lduw_p(vdev->config + addr);
2062    return val;
2063}
2064
2065uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2066{
2067    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2068    uint32_t val;
2069
2070    if (addr + sizeof(val) > vdev->config_len) {
2071        return (uint32_t)-1;
2072    }
2073
2074    k->get_config(vdev, vdev->config);
2075
2076    val = ldl_p(vdev->config + addr);
2077    return val;
2078}
2079
2080void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2081{
2082    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2083    uint8_t val = data;
2084
2085    if (addr + sizeof(val) > vdev->config_len) {
2086        return;
2087    }
2088
2089    stb_p(vdev->config + addr, val);
2090
2091    if (k->set_config) {
2092        k->set_config(vdev, vdev->config);
2093    }
2094}
2095
2096void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2097{
2098    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2099    uint16_t val = data;
2100
2101    if (addr + sizeof(val) > vdev->config_len) {
2102        return;
2103    }
2104
2105    stw_p(vdev->config + addr, val);
2106
2107    if (k->set_config) {
2108        k->set_config(vdev, vdev->config);
2109    }
2110}
2111
2112void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2113{
2114    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2115    uint32_t val = data;
2116
2117    if (addr + sizeof(val) > vdev->config_len) {
2118        return;
2119    }
2120
2121    stl_p(vdev->config + addr, val);
2122
2123    if (k->set_config) {
2124        k->set_config(vdev, vdev->config);
2125    }
2126}
2127
2128uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2129{
2130    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2131    uint8_t val;
2132
2133    if (addr + sizeof(val) > vdev->config_len) {
2134        return (uint32_t)-1;
2135    }
2136
2137    k->get_config(vdev, vdev->config);
2138
2139    val = ldub_p(vdev->config + addr);
2140    return val;
2141}
2142
2143uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2144{
2145    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2146    uint16_t val;
2147
2148    if (addr + sizeof(val) > vdev->config_len) {
2149        return (uint32_t)-1;
2150    }
2151
2152    k->get_config(vdev, vdev->config);
2153
2154    val = lduw_le_p(vdev->config + addr);
2155    return val;
2156}
2157
2158uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2159{
2160    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2161    uint32_t val;
2162
2163    if (addr + sizeof(val) > vdev->config_len) {
2164        return (uint32_t)-1;
2165    }
2166
2167    k->get_config(vdev, vdev->config);
2168
2169    val = ldl_le_p(vdev->config + addr);
2170    return val;
2171}
2172
2173void virtio_config_modern_writeb(VirtIODevice *vdev,
2174                                 uint32_t addr, uint32_t data)
2175{
2176    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2177    uint8_t val = data;
2178
2179    if (addr + sizeof(val) > vdev->config_len) {
2180        return;
2181    }
2182
2183    stb_p(vdev->config + addr, val);
2184
2185    if (k->set_config) {
2186        k->set_config(vdev, vdev->config);
2187    }
2188}
2189
2190void virtio_config_modern_writew(VirtIODevice *vdev,
2191                                 uint32_t addr, uint32_t data)
2192{
2193    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2194    uint16_t val = data;
2195
2196    if (addr + sizeof(val) > vdev->config_len) {
2197        return;
2198    }
2199
2200    stw_le_p(vdev->config + addr, val);
2201
2202    if (k->set_config) {
2203        k->set_config(vdev, vdev->config);
2204    }
2205}
2206
2207void virtio_config_modern_writel(VirtIODevice *vdev,
2208                                 uint32_t addr, uint32_t data)
2209{
2210    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2211    uint32_t val = data;
2212
2213    if (addr + sizeof(val) > vdev->config_len) {
2214        return;
2215    }
2216
2217    stl_le_p(vdev->config + addr, val);
2218
2219    if (k->set_config) {
2220        k->set_config(vdev, vdev->config);
2221    }
2222}
2223
2224void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2225{
2226    if (!vdev->vq[n].vring.num) {
2227        return;
2228    }
2229    vdev->vq[n].vring.desc = addr;
2230    virtio_queue_update_rings(vdev, n);
2231}
2232
2233hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2234{
2235    return vdev->vq[n].vring.desc;
2236}
2237
2238void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2239                            hwaddr avail, hwaddr used)
2240{
2241    if (!vdev->vq[n].vring.num) {
2242        return;
2243    }
2244    vdev->vq[n].vring.desc = desc;
2245    vdev->vq[n].vring.avail = avail;
2246    vdev->vq[n].vring.used = used;
2247    virtio_init_region_cache(vdev, n);
2248}
2249
2250void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2251{
2252    /* Don't allow guest to flip queue between existent and
2253     * nonexistent states, or to set it to an invalid size.
2254     */
2255    if (!!num != !!vdev->vq[n].vring.num ||
2256        num > VIRTQUEUE_MAX_SIZE ||
2257        num < 0) {
2258        return;
2259    }
2260    vdev->vq[n].vring.num = num;
2261}
2262
2263VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2264{
2265    return QLIST_FIRST(&vdev->vector_queues[vector]);
2266}
2267
2268VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2269{
2270    return QLIST_NEXT(vq, node);
2271}
2272
2273int virtio_queue_get_num(VirtIODevice *vdev, int n)
2274{
2275    return vdev->vq[n].vring.num;
2276}
2277
2278int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2279{
2280    return vdev->vq[n].vring.num_default;
2281}
2282
2283int virtio_get_num_queues(VirtIODevice *vdev)
2284{
2285    int i;
2286
2287    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2288        if (!virtio_queue_get_num(vdev, i)) {
2289            break;
2290        }
2291    }
2292
2293    return i;
2294}
2295
2296void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2297{
2298    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2299    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2300
2301    /* virtio-1 compliant devices cannot change the alignment */
2302    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2303        error_report("tried to modify queue alignment for virtio-1 device");
2304        return;
2305    }
2306    /* Check that the transport told us it was going to do this
2307     * (so a buggy transport will immediately assert rather than
2308     * silently failing to migrate this state)
2309     */
2310    assert(k->has_variable_vring_alignment);
2311
2312    if (align) {
2313        vdev->vq[n].vring.align = align;
2314        virtio_queue_update_rings(vdev, n);
2315    }
2316}
2317
2318static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2319{
2320    bool ret = false;
2321
2322    if (vq->vring.desc && vq->handle_aio_output) {
2323        VirtIODevice *vdev = vq->vdev;
2324
2325        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2326        ret = vq->handle_aio_output(vdev, vq);
2327
2328        if (unlikely(vdev->start_on_kick)) {
2329            virtio_set_started(vdev, true);
2330        }
2331    }
2332
2333    return ret;
2334}
2335
2336static void virtio_queue_notify_vq(VirtQueue *vq)
2337{
2338    if (vq->vring.desc && vq->handle_output) {
2339        VirtIODevice *vdev = vq->vdev;
2340
2341        if (unlikely(vdev->broken)) {
2342            return;
2343        }
2344
2345        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2346        vq->handle_output(vdev, vq);
2347
2348        if (unlikely(vdev->start_on_kick)) {
2349            virtio_set_started(vdev, true);
2350        }
2351    }
2352}
2353
2354void virtio_queue_notify(VirtIODevice *vdev, int n)
2355{
2356    VirtQueue *vq = &vdev->vq[n];
2357
2358    if (unlikely(!vq->vring.desc || vdev->broken)) {
2359        return;
2360    }
2361
2362    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2363    if (vq->host_notifier_enabled) {
2364        event_notifier_set(&vq->host_notifier);
2365    } else if (vq->handle_output) {
2366        vq->handle_output(vdev, vq);
2367
2368        if (unlikely(vdev->start_on_kick)) {
2369            virtio_set_started(vdev, true);
2370        }
2371    }
2372}
2373
2374uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2375{
2376    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2377        VIRTIO_NO_VECTOR;
2378}
2379
2380void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2381{
2382    VirtQueue *vq = &vdev->vq[n];
2383
2384    if (n < VIRTIO_QUEUE_MAX) {
2385        if (vdev->vector_queues &&
2386            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2387            QLIST_REMOVE(vq, node);
2388        }
2389        vdev->vq[n].vector = vector;
2390        if (vdev->vector_queues &&
2391            vector != VIRTIO_NO_VECTOR) {
2392            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2393        }
2394    }
2395}
2396
2397VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2398                            VirtIOHandleOutput handle_output)
2399{
2400    int i;
2401
2402    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2403        if (vdev->vq[i].vring.num == 0)
2404            break;
2405    }
2406
2407    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2408        abort();
2409
2410    vdev->vq[i].vring.num = queue_size;
2411    vdev->vq[i].vring.num_default = queue_size;
2412    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2413    vdev->vq[i].handle_output = handle_output;
2414    vdev->vq[i].handle_aio_output = NULL;
2415    vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2416                                       queue_size);
2417
2418    return &vdev->vq[i];
2419}
2420
2421void virtio_delete_queue(VirtQueue *vq)
2422{
2423    vq->vring.num = 0;
2424    vq->vring.num_default = 0;
2425    vq->handle_output = NULL;
2426    vq->handle_aio_output = NULL;
2427    g_free(vq->used_elems);
2428    vq->used_elems = NULL;
2429    virtio_virtqueue_reset_region_cache(vq);
2430}
2431
2432void virtio_del_queue(VirtIODevice *vdev, int n)
2433{
2434    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2435        abort();
2436    }
2437
2438    virtio_delete_queue(&vdev->vq[n]);
2439}
2440
2441static void virtio_set_isr(VirtIODevice *vdev, int value)
2442{
2443    uint8_t old = qatomic_read(&vdev->isr);
2444
2445    /* Do not write ISR if it does not change, so that its cacheline remains
2446     * shared in the common case where the guest does not read it.
2447     */
2448    if ((old & value) != value) {
2449        qatomic_or(&vdev->isr, value);
2450    }
2451}
2452
2453static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2454{
2455    uint16_t old, new;
2456    bool v;
2457    /* We need to expose used array entries before checking used event. */
2458    smp_mb();
2459    /* Always notify when queue is empty (when feature acknowledge) */
2460    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2461        !vq->inuse && virtio_queue_empty(vq)) {
2462        return true;
2463    }
2464
2465    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2466        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2467    }
2468
2469    v = vq->signalled_used_valid;
2470    vq->signalled_used_valid = true;
2471    old = vq->signalled_used;
2472    new = vq->signalled_used = vq->used_idx;
2473    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2474}
2475
2476static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2477                                    uint16_t off_wrap, uint16_t new,
2478                                    uint16_t old)
2479{
2480    int off = off_wrap & ~(1 << 15);
2481
2482    if (wrap != off_wrap >> 15) {
2483        off -= vq->vring.num;
2484    }
2485
2486    return vring_need_event(off, new, old);
2487}
2488
2489static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2490{
2491    VRingPackedDescEvent e;
2492    uint16_t old, new;
2493    bool v;
2494    VRingMemoryRegionCaches *caches;
2495
2496    caches = vring_get_region_caches(vq);
2497    if (!caches) {
2498        return false;
2499    }
2500
2501    vring_packed_event_read(vdev, &caches->avail, &e);
2502
2503    old = vq->signalled_used;
2504    new = vq->signalled_used = vq->used_idx;
2505    v = vq->signalled_used_valid;
2506    vq->signalled_used_valid = true;
2507
2508    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2509        return false;
2510    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2511        return true;
2512    }
2513
2514    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2515                                         e.off_wrap, new, old);
2516}
2517
2518/* Called within rcu_read_lock().  */
2519static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2520{
2521    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2522        return virtio_packed_should_notify(vdev, vq);
2523    } else {
2524        return virtio_split_should_notify(vdev, vq);
2525    }
2526}
2527
2528void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2529{
2530    WITH_RCU_READ_LOCK_GUARD() {
2531        if (!virtio_should_notify(vdev, vq)) {
2532            return;
2533        }
2534    }
2535
2536    trace_virtio_notify_irqfd(vdev, vq);
2537
2538    /*
2539     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2540     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2541     * incorrectly polling this bit during crashdump and hibernation
2542     * in MSI mode, causing a hang if this bit is never updated.
2543     * Recent releases of Windows do not really shut down, but rather
2544     * log out and hibernate to make the next startup faster.  Hence,
2545     * this manifested as a more serious hang during shutdown with
2546     *
2547     * Next driver release from 2016 fixed this problem, so working around it
2548     * is not a must, but it's easy to do so let's do it here.
2549     *
2550     * Note: it's safe to update ISR from any thread as it was switched
2551     * to an atomic operation.
2552     */
2553    virtio_set_isr(vq->vdev, 0x1);
2554    event_notifier_set(&vq->guest_notifier);
2555}
2556
2557static void virtio_irq(VirtQueue *vq)
2558{
2559    virtio_set_isr(vq->vdev, 0x1);
2560    virtio_notify_vector(vq->vdev, vq->vector);
2561}
2562
2563void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2564{
2565    WITH_RCU_READ_LOCK_GUARD() {
2566        if (!virtio_should_notify(vdev, vq)) {
2567            return;
2568        }
2569    }
2570
2571    trace_virtio_notify(vdev, vq);
2572    virtio_irq(vq);
2573}
2574
2575void virtio_notify_config(VirtIODevice *vdev)
2576{
2577    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2578        return;
2579
2580    virtio_set_isr(vdev, 0x3);
2581    vdev->generation++;
2582    virtio_notify_vector(vdev, vdev->config_vector);
2583}
2584
2585static bool virtio_device_endian_needed(void *opaque)
2586{
2587    VirtIODevice *vdev = opaque;
2588
2589    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2590    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2591        return vdev->device_endian != virtio_default_endian();
2592    }
2593    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2594    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2595}
2596
2597static bool virtio_64bit_features_needed(void *opaque)
2598{
2599    VirtIODevice *vdev = opaque;
2600
2601    return (vdev->host_features >> 32) != 0;
2602}
2603
2604static bool virtio_virtqueue_needed(void *opaque)
2605{
2606    VirtIODevice *vdev = opaque;
2607
2608    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2609}
2610
2611static bool virtio_packed_virtqueue_needed(void *opaque)
2612{
2613    VirtIODevice *vdev = opaque;
2614
2615    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2616}
2617
2618static bool virtio_ringsize_needed(void *opaque)
2619{
2620    VirtIODevice *vdev = opaque;
2621    int i;
2622
2623    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2624        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2625            return true;
2626        }
2627    }
2628    return false;
2629}
2630
2631static bool virtio_extra_state_needed(void *opaque)
2632{
2633    VirtIODevice *vdev = opaque;
2634    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2635    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2636
2637    return k->has_extra_state &&
2638        k->has_extra_state(qbus->parent);
2639}
2640
2641static bool virtio_broken_needed(void *opaque)
2642{
2643    VirtIODevice *vdev = opaque;
2644
2645    return vdev->broken;
2646}
2647
2648static bool virtio_started_needed(void *opaque)
2649{
2650    VirtIODevice *vdev = opaque;
2651
2652    return vdev->started;
2653}
2654
2655static bool virtio_disabled_needed(void *opaque)
2656{
2657    VirtIODevice *vdev = opaque;
2658
2659    return vdev->disabled;
2660}
2661
2662static const VMStateDescription vmstate_virtqueue = {
2663    .name = "virtqueue_state",
2664    .version_id = 1,
2665    .minimum_version_id = 1,
2666    .fields = (VMStateField[]) {
2667        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2668        VMSTATE_UINT64(vring.used, struct VirtQueue),
2669        VMSTATE_END_OF_LIST()
2670    }
2671};
2672
2673static const VMStateDescription vmstate_packed_virtqueue = {
2674    .name = "packed_virtqueue_state",
2675    .version_id = 1,
2676    .minimum_version_id = 1,
2677    .fields = (VMStateField[]) {
2678        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2679        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2680        VMSTATE_UINT16(used_idx, struct VirtQueue),
2681        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2682        VMSTATE_UINT32(inuse, struct VirtQueue),
2683        VMSTATE_END_OF_LIST()
2684    }
2685};
2686
2687static const VMStateDescription vmstate_virtio_virtqueues = {
2688    .name = "virtio/virtqueues",
2689    .version_id = 1,
2690    .minimum_version_id = 1,
2691    .needed = &virtio_virtqueue_needed,
2692    .fields = (VMStateField[]) {
2693        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2694                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2695        VMSTATE_END_OF_LIST()
2696    }
2697};
2698
2699static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2700    .name = "virtio/packed_virtqueues",
2701    .version_id = 1,
2702    .minimum_version_id = 1,
2703    .needed = &virtio_packed_virtqueue_needed,
2704    .fields = (VMStateField[]) {
2705        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2706                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2707        VMSTATE_END_OF_LIST()
2708    }
2709};
2710
2711static const VMStateDescription vmstate_ringsize = {
2712    .name = "ringsize_state",
2713    .version_id = 1,
2714    .minimum_version_id = 1,
2715    .fields = (VMStateField[]) {
2716        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2717        VMSTATE_END_OF_LIST()
2718    }
2719};
2720
2721static const VMStateDescription vmstate_virtio_ringsize = {
2722    .name = "virtio/ringsize",
2723    .version_id = 1,
2724    .minimum_version_id = 1,
2725    .needed = &virtio_ringsize_needed,
2726    .fields = (VMStateField[]) {
2727        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2728                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2729        VMSTATE_END_OF_LIST()
2730    }
2731};
2732
2733static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2734                           const VMStateField *field)
2735{
2736    VirtIODevice *vdev = pv;
2737    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2738    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2739
2740    if (!k->load_extra_state) {
2741        return -1;
2742    } else {
2743        return k->load_extra_state(qbus->parent, f);
2744    }
2745}
2746
2747static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2748                           const VMStateField *field, JSONWriter *vmdesc)
2749{
2750    VirtIODevice *vdev = pv;
2751    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2752    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2753
2754    k->save_extra_state(qbus->parent, f);
2755    return 0;
2756}
2757
2758static const VMStateInfo vmstate_info_extra_state = {
2759    .name = "virtqueue_extra_state",
2760    .get = get_extra_state,
2761    .put = put_extra_state,
2762};
2763
2764static const VMStateDescription vmstate_virtio_extra_state = {
2765    .name = "virtio/extra_state",
2766    .version_id = 1,
2767    .minimum_version_id = 1,
2768    .needed = &virtio_extra_state_needed,
2769    .fields = (VMStateField[]) {
2770        {
2771            .name         = "extra_state",
2772            .version_id   = 0,
2773            .field_exists = NULL,
2774            .size         = 0,
2775            .info         = &vmstate_info_extra_state,
2776            .flags        = VMS_SINGLE,
2777            .offset       = 0,
2778        },
2779        VMSTATE_END_OF_LIST()
2780    }
2781};
2782
2783static const VMStateDescription vmstate_virtio_device_endian = {
2784    .name = "virtio/device_endian",
2785    .version_id = 1,
2786    .minimum_version_id = 1,
2787    .needed = &virtio_device_endian_needed,
2788    .fields = (VMStateField[]) {
2789        VMSTATE_UINT8(device_endian, VirtIODevice),
2790        VMSTATE_END_OF_LIST()
2791    }
2792};
2793
2794static const VMStateDescription vmstate_virtio_64bit_features = {
2795    .name = "virtio/64bit_features",
2796    .version_id = 1,
2797    .minimum_version_id = 1,
2798    .needed = &virtio_64bit_features_needed,
2799    .fields = (VMStateField[]) {
2800        VMSTATE_UINT64(guest_features, VirtIODevice),
2801        VMSTATE_END_OF_LIST()
2802    }
2803};
2804
2805static const VMStateDescription vmstate_virtio_broken = {
2806    .name = "virtio/broken",
2807    .version_id = 1,
2808    .minimum_version_id = 1,
2809    .needed = &virtio_broken_needed,
2810    .fields = (VMStateField[]) {
2811        VMSTATE_BOOL(broken, VirtIODevice),
2812        VMSTATE_END_OF_LIST()
2813    }
2814};
2815
2816static const VMStateDescription vmstate_virtio_started = {
2817    .name = "virtio/started",
2818    .version_id = 1,
2819    .minimum_version_id = 1,
2820    .needed = &virtio_started_needed,
2821    .fields = (VMStateField[]) {
2822        VMSTATE_BOOL(started, VirtIODevice),
2823        VMSTATE_END_OF_LIST()
2824    }
2825};
2826
2827static const VMStateDescription vmstate_virtio_disabled = {
2828    .name = "virtio/disabled",
2829    .version_id = 1,
2830    .minimum_version_id = 1,
2831    .needed = &virtio_disabled_needed,
2832    .fields = (VMStateField[]) {
2833        VMSTATE_BOOL(disabled, VirtIODevice),
2834        VMSTATE_END_OF_LIST()
2835    }
2836};
2837
2838static const VMStateDescription vmstate_virtio = {
2839    .name = "virtio",
2840    .version_id = 1,
2841    .minimum_version_id = 1,
2842    .minimum_version_id_old = 1,
2843    .fields = (VMStateField[]) {
2844        VMSTATE_END_OF_LIST()
2845    },
2846    .subsections = (const VMStateDescription*[]) {
2847        &vmstate_virtio_device_endian,
2848        &vmstate_virtio_64bit_features,
2849        &vmstate_virtio_virtqueues,
2850        &vmstate_virtio_ringsize,
2851        &vmstate_virtio_broken,
2852        &vmstate_virtio_extra_state,
2853        &vmstate_virtio_started,
2854        &vmstate_virtio_packed_virtqueues,
2855        &vmstate_virtio_disabled,
2856        NULL
2857    }
2858};
2859
2860int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2861{
2862    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2863    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2864    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2865    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2866    int i;
2867
2868    if (k->save_config) {
2869        k->save_config(qbus->parent, f);
2870    }
2871
2872    qemu_put_8s(f, &vdev->status);
2873    qemu_put_8s(f, &vdev->isr);
2874    qemu_put_be16s(f, &vdev->queue_sel);
2875    qemu_put_be32s(f, &guest_features_lo);
2876    qemu_put_be32(f, vdev->config_len);
2877    qemu_put_buffer(f, vdev->config, vdev->config_len);
2878
2879    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2880        if (vdev->vq[i].vring.num == 0)
2881            break;
2882    }
2883
2884    qemu_put_be32(f, i);
2885
2886    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2887        if (vdev->vq[i].vring.num == 0)
2888            break;
2889
2890        qemu_put_be32(f, vdev->vq[i].vring.num);
2891        if (k->has_variable_vring_alignment) {
2892            qemu_put_be32(f, vdev->vq[i].vring.align);
2893        }
2894        /*
2895         * Save desc now, the rest of the ring addresses are saved in
2896         * subsections for VIRTIO-1 devices.
2897         */
2898        qemu_put_be64(f, vdev->vq[i].vring.desc);
2899        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2900        if (k->save_queue) {
2901            k->save_queue(qbus->parent, i, f);
2902        }
2903    }
2904
2905    if (vdc->save != NULL) {
2906        vdc->save(vdev, f);
2907    }
2908
2909    if (vdc->vmsd) {
2910        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2911        if (ret) {
2912            return ret;
2913        }
2914    }
2915
2916    /* Subsections */
2917    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2918}
2919
2920/* A wrapper for use as a VMState .put function */
2921static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2922                              const VMStateField *field, JSONWriter *vmdesc)
2923{
2924    return virtio_save(VIRTIO_DEVICE(opaque), f);
2925}
2926
2927/* A wrapper for use as a VMState .get function */
2928static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2929                             const VMStateField *field)
2930{
2931    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2932    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2933
2934    return virtio_load(vdev, f, dc->vmsd->version_id);
2935}
2936
2937const VMStateInfo  virtio_vmstate_info = {
2938    .name = "virtio",
2939    .get = virtio_device_get,
2940    .put = virtio_device_put,
2941};
2942
2943static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2944{
2945    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2946    bool bad = (val & ~(vdev->host_features)) != 0;
2947
2948    val &= vdev->host_features;
2949    if (k->set_features) {
2950        k->set_features(vdev, val);
2951    }
2952    vdev->guest_features = val;
2953    return bad ? -1 : 0;
2954}
2955
2956int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2957{
2958    int ret;
2959    /*
2960     * The driver must not attempt to set features after feature negotiation
2961     * has finished.
2962     */
2963    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2964        return -EINVAL;
2965    }
2966    ret = virtio_set_features_nocheck(vdev, val);
2967    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2968        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2969        int i;
2970        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2971            if (vdev->vq[i].vring.num != 0) {
2972                virtio_init_region_cache(vdev, i);
2973            }
2974        }
2975    }
2976    if (!ret) {
2977        if (!virtio_device_started(vdev, vdev->status) &&
2978            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2979            vdev->start_on_kick = true;
2980        }
2981    }
2982    return ret;
2983}
2984
2985size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2986                                      uint64_t host_features)
2987{
2988    size_t config_size = 0;
2989    int i;
2990
2991    for (i = 0; feature_sizes[i].flags != 0; i++) {
2992        if (host_features & feature_sizes[i].flags) {
2993            config_size = MAX(feature_sizes[i].end, config_size);
2994        }
2995    }
2996
2997    return config_size;
2998}
2999
3000int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3001{
3002    int i, ret;
3003    int32_t config_len;
3004    uint32_t num;
3005    uint32_t features;
3006    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3007    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3008    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3009
3010    /*
3011     * We poison the endianness to ensure it does not get used before
3012     * subsections have been loaded.
3013     */
3014    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3015
3016    if (k->load_config) {
3017        ret = k->load_config(qbus->parent, f);
3018        if (ret)
3019            return ret;
3020    }
3021
3022    qemu_get_8s(f, &vdev->status);
3023    qemu_get_8s(f, &vdev->isr);
3024    qemu_get_be16s(f, &vdev->queue_sel);
3025    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3026        return -1;
3027    }
3028    qemu_get_be32s(f, &features);
3029
3030    /*
3031     * Temporarily set guest_features low bits - needed by
3032     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3033     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3034     *
3035     * Note: devices should always test host features in future - don't create
3036     * new dependencies like this.
3037     */
3038    vdev->guest_features = features;
3039
3040    config_len = qemu_get_be32(f);
3041
3042    /*
3043     * There are cases where the incoming config can be bigger or smaller
3044     * than what we have; so load what we have space for, and skip
3045     * any excess that's in the stream.
3046     */
3047    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3048
3049    while (config_len > vdev->config_len) {
3050        qemu_get_byte(f);
3051        config_len--;
3052    }
3053
3054    num = qemu_get_be32(f);
3055
3056    if (num > VIRTIO_QUEUE_MAX) {
3057        error_report("Invalid number of virtqueues: 0x%x", num);
3058        return -1;
3059    }
3060
3061    for (i = 0; i < num; i++) {
3062        vdev->vq[i].vring.num = qemu_get_be32(f);
3063        if (k->has_variable_vring_alignment) {
3064            vdev->vq[i].vring.align = qemu_get_be32(f);
3065        }
3066        vdev->vq[i].vring.desc = qemu_get_be64(f);
3067        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3068        vdev->vq[i].signalled_used_valid = false;
3069        vdev->vq[i].notification = true;
3070
3071        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3072            error_report("VQ %d address 0x0 "
3073                         "inconsistent with Host index 0x%x",
3074                         i, vdev->vq[i].last_avail_idx);
3075            return -1;
3076        }
3077        if (k->load_queue) {
3078            ret = k->load_queue(qbus->parent, i, f);
3079            if (ret)
3080                return ret;
3081        }
3082    }
3083
3084    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3085
3086    if (vdc->load != NULL) {
3087        ret = vdc->load(vdev, f, version_id);
3088        if (ret) {
3089            return ret;
3090        }
3091    }
3092
3093    if (vdc->vmsd) {
3094        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3095        if (ret) {
3096            return ret;
3097        }
3098    }
3099
3100    /* Subsections */
3101    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3102    if (ret) {
3103        return ret;
3104    }
3105
3106    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3107        vdev->device_endian = virtio_default_endian();
3108    }
3109
3110    if (virtio_64bit_features_needed(vdev)) {
3111        /*
3112         * Subsection load filled vdev->guest_features.  Run them
3113         * through virtio_set_features to sanity-check them against
3114         * host_features.
3115         */
3116        uint64_t features64 = vdev->guest_features;
3117        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3118            error_report("Features 0x%" PRIx64 " unsupported. "
3119                         "Allowed features: 0x%" PRIx64,
3120                         features64, vdev->host_features);
3121            return -1;
3122        }
3123    } else {
3124        if (virtio_set_features_nocheck(vdev, features) < 0) {
3125            error_report("Features 0x%x unsupported. "
3126                         "Allowed features: 0x%" PRIx64,
3127                         features, vdev->host_features);
3128            return -1;
3129        }
3130    }
3131
3132    if (!virtio_device_started(vdev, vdev->status) &&
3133        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3134        vdev->start_on_kick = true;
3135    }
3136
3137    RCU_READ_LOCK_GUARD();
3138    for (i = 0; i < num; i++) {
3139        if (vdev->vq[i].vring.desc) {
3140            uint16_t nheads;
3141
3142            /*
3143             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3144             * only the region cache needs to be set up.  Legacy devices need
3145             * to calculate used and avail ring addresses based on the desc
3146             * address.
3147             */
3148            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3149                virtio_init_region_cache(vdev, i);
3150            } else {
3151                virtio_queue_update_rings(vdev, i);
3152            }
3153
3154            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3155                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3156                vdev->vq[i].shadow_avail_wrap_counter =
3157                                        vdev->vq[i].last_avail_wrap_counter;
3158                continue;
3159            }
3160
3161            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3162            /* Check it isn't doing strange things with descriptor numbers. */
3163            if (nheads > vdev->vq[i].vring.num) {
3164                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3165                             "inconsistent with Host index 0x%x: delta 0x%x",
3166                             i, vdev->vq[i].vring.num,
3167                             vring_avail_idx(&vdev->vq[i]),
3168                             vdev->vq[i].last_avail_idx, nheads);
3169                vdev->vq[i].used_idx = 0;
3170                vdev->vq[i].shadow_avail_idx = 0;
3171                vdev->vq[i].inuse = 0;
3172                continue;
3173            }
3174            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3175            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3176
3177            /*
3178             * Some devices migrate VirtQueueElements that have been popped
3179             * from the avail ring but not yet returned to the used ring.
3180             * Since max ring size < UINT16_MAX it's safe to use modulo
3181             * UINT16_MAX + 1 subtraction.
3182             */
3183            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3184                                vdev->vq[i].used_idx);
3185            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3186                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3187                             "used_idx 0x%x",
3188                             i, vdev->vq[i].vring.num,
3189                             vdev->vq[i].last_avail_idx,
3190                             vdev->vq[i].used_idx);
3191                return -1;
3192            }
3193        }
3194    }
3195
3196    if (vdc->post_load) {
3197        ret = vdc->post_load(vdev);
3198        if (ret) {
3199            return ret;
3200        }
3201    }
3202
3203    return 0;
3204}
3205
3206void virtio_cleanup(VirtIODevice *vdev)
3207{
3208    qemu_del_vm_change_state_handler(vdev->vmstate);
3209}
3210
3211static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3212{
3213    VirtIODevice *vdev = opaque;
3214    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3215    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3216    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3217    vdev->vm_running = running;
3218
3219    if (backend_run) {
3220        virtio_set_status(vdev, vdev->status);
3221    }
3222
3223    if (k->vmstate_change) {
3224        k->vmstate_change(qbus->parent, backend_run);
3225    }
3226
3227    if (!backend_run) {
3228        virtio_set_status(vdev, vdev->status);
3229    }
3230}
3231
3232void virtio_instance_init_common(Object *proxy_obj, void *data,
3233                                 size_t vdev_size, const char *vdev_name)
3234{
3235    DeviceState *vdev = data;
3236
3237    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3238                                       vdev_size, vdev_name, &error_abort,
3239                                       NULL);
3240    qdev_alias_all_properties(vdev, proxy_obj);
3241}
3242
3243void virtio_init(VirtIODevice *vdev, const char *name,
3244                 uint16_t device_id, size_t config_size)
3245{
3246    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3247    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3248    int i;
3249    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3250
3251    if (nvectors) {
3252        vdev->vector_queues =
3253            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3254    }
3255
3256    vdev->start_on_kick = false;
3257    vdev->started = false;
3258    vdev->device_id = device_id;
3259    vdev->status = 0;
3260    qatomic_set(&vdev->isr, 0);
3261    vdev->queue_sel = 0;
3262    vdev->config_vector = VIRTIO_NO_VECTOR;
3263    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3264    vdev->vm_running = runstate_is_running();
3265    vdev->broken = false;
3266    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3267        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3268        vdev->vq[i].vdev = vdev;
3269        vdev->vq[i].queue_index = i;
3270        vdev->vq[i].host_notifier_enabled = false;
3271    }
3272
3273    vdev->name = name;
3274    vdev->config_len = config_size;
3275    if (vdev->config_len) {
3276        vdev->config = g_malloc0(config_size);
3277    } else {
3278        vdev->config = NULL;
3279    }
3280    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3281            virtio_vmstate_change, vdev);
3282    vdev->device_endian = virtio_default_endian();
3283    vdev->use_guest_notifier_mask = true;
3284}
3285
3286/*
3287 * Only devices that have already been around prior to defining the virtio
3288 * standard support legacy mode; this includes devices not specified in the
3289 * standard. All newer devices conform to the virtio standard only.
3290 */
3291bool virtio_legacy_allowed(VirtIODevice *vdev)
3292{
3293    switch (vdev->device_id) {
3294    case VIRTIO_ID_NET:
3295    case VIRTIO_ID_BLOCK:
3296    case VIRTIO_ID_CONSOLE:
3297    case VIRTIO_ID_RNG:
3298    case VIRTIO_ID_BALLOON:
3299    case VIRTIO_ID_RPMSG:
3300    case VIRTIO_ID_SCSI:
3301    case VIRTIO_ID_9P:
3302    case VIRTIO_ID_RPROC_SERIAL:
3303    case VIRTIO_ID_CAIF:
3304        return true;
3305    default:
3306        return false;
3307    }
3308}
3309
3310bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3311{
3312    return vdev->disable_legacy_check;
3313}
3314
3315hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3316{
3317    return vdev->vq[n].vring.desc;
3318}
3319
3320bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3321{
3322    return virtio_queue_get_desc_addr(vdev, n) != 0;
3323}
3324
3325bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3326{
3327    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3328    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3329
3330    if (k->queue_enabled) {
3331        return k->queue_enabled(qbus->parent, n);
3332    }
3333    return virtio_queue_enabled_legacy(vdev, n);
3334}
3335
3336hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3337{
3338    return vdev->vq[n].vring.avail;
3339}
3340
3341hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3342{
3343    return vdev->vq[n].vring.used;
3344}
3345
3346hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3347{
3348    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3349}
3350
3351hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3352{
3353    int s;
3354
3355    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3356        return sizeof(struct VRingPackedDescEvent);
3357    }
3358
3359    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3360    return offsetof(VRingAvail, ring) +
3361        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3362}
3363
3364hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3365{
3366    int s;
3367
3368    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3369        return sizeof(struct VRingPackedDescEvent);
3370    }
3371
3372    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3373    return offsetof(VRingUsed, ring) +
3374        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3375}
3376
3377static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3378                                                           int n)
3379{
3380    unsigned int avail, used;
3381
3382    avail = vdev->vq[n].last_avail_idx;
3383    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3384
3385    used = vdev->vq[n].used_idx;
3386    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3387
3388    return avail | used << 16;
3389}
3390
3391static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3392                                                      int n)
3393{
3394    return vdev->vq[n].last_avail_idx;
3395}
3396
3397unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3398{
3399    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3400        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3401    } else {
3402        return virtio_queue_split_get_last_avail_idx(vdev, n);
3403    }
3404}
3405
3406static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3407                                                   int n, unsigned int idx)
3408{
3409    struct VirtQueue *vq = &vdev->vq[n];
3410
3411    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3412    vq->last_avail_wrap_counter =
3413        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3414    idx >>= 16;
3415    vq->used_idx = idx & 0x7ffff;
3416    vq->used_wrap_counter = !!(idx & 0x8000);
3417}
3418
3419static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3420                                                  int n, unsigned int idx)
3421{
3422        vdev->vq[n].last_avail_idx = idx;
3423        vdev->vq[n].shadow_avail_idx = idx;
3424}
3425
3426void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3427                                     unsigned int idx)
3428{
3429    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3430        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3431    } else {
3432        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3433    }
3434}
3435
3436static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3437                                                       int n)
3438{
3439    /* We don't have a reference like avail idx in shared memory */
3440    return;
3441}
3442
3443static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3444                                                      int n)
3445{
3446    RCU_READ_LOCK_GUARD();
3447    if (vdev->vq[n].vring.desc) {
3448        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3449        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3450    }
3451}
3452
3453void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3454{
3455    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3456        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3457    } else {
3458        virtio_queue_split_restore_last_avail_idx(vdev, n);
3459    }
3460}
3461
3462static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3463{
3464    /* used idx was updated through set_last_avail_idx() */
3465    return;
3466}
3467
3468static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3469{
3470    RCU_READ_LOCK_GUARD();
3471    if (vdev->vq[n].vring.desc) {
3472        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3473    }
3474}
3475
3476void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3477{
3478    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3479        return virtio_queue_packed_update_used_idx(vdev, n);
3480    } else {
3481        return virtio_split_packed_update_used_idx(vdev, n);
3482    }
3483}
3484
3485void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3486{
3487    vdev->vq[n].signalled_used_valid = false;
3488}
3489
3490VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3491{
3492    return vdev->vq + n;
3493}
3494
3495uint16_t virtio_get_queue_index(VirtQueue *vq)
3496{
3497    return vq->queue_index;
3498}
3499
3500static void virtio_queue_guest_notifier_read(EventNotifier *n)
3501{
3502    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3503    if (event_notifier_test_and_clear(n)) {
3504        virtio_irq(vq);
3505    }
3506}
3507
3508void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3509                                                bool with_irqfd)
3510{
3511    if (assign && !with_irqfd) {
3512        event_notifier_set_handler(&vq->guest_notifier,
3513                                   virtio_queue_guest_notifier_read);
3514    } else {
3515        event_notifier_set_handler(&vq->guest_notifier, NULL);
3516    }
3517    if (!assign) {
3518        /* Test and clear notifier before closing it,
3519         * in case poll callback didn't have time to run. */
3520        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3521    }
3522}
3523
3524EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3525{
3526    return &vq->guest_notifier;
3527}
3528
3529static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3530{
3531    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3532    if (event_notifier_test_and_clear(n)) {
3533        virtio_queue_notify_aio_vq(vq);
3534    }
3535}
3536
3537static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3538{
3539    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3540
3541    virtio_queue_set_notification(vq, 0);
3542}
3543
3544static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3545{
3546    EventNotifier *n = opaque;
3547    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548
3549    if (!vq->vring.desc || virtio_queue_empty(vq)) {
3550        return false;
3551    }
3552
3553    return virtio_queue_notify_aio_vq(vq);
3554}
3555
3556static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3557{
3558    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3559
3560    /* Caller polls once more after this to catch requests that race with us */
3561    virtio_queue_set_notification(vq, 1);
3562}
3563
3564void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3565                                                VirtIOHandleAIOOutput handle_output)
3566{
3567    if (handle_output) {
3568        vq->handle_aio_output = handle_output;
3569        aio_set_event_notifier(ctx, &vq->host_notifier, true,
3570                               virtio_queue_host_notifier_aio_read,
3571                               virtio_queue_host_notifier_aio_poll);
3572        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3573                                    virtio_queue_host_notifier_aio_poll_begin,
3574                                    virtio_queue_host_notifier_aio_poll_end);
3575    } else {
3576        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3577        /* Test and clear notifier before after disabling event,
3578         * in case poll callback didn't have time to run. */
3579        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3580        vq->handle_aio_output = NULL;
3581    }
3582}
3583
3584void virtio_queue_host_notifier_read(EventNotifier *n)
3585{
3586    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3587    if (event_notifier_test_and_clear(n)) {
3588        virtio_queue_notify_vq(vq);
3589    }
3590}
3591
3592EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3593{
3594    return &vq->host_notifier;
3595}
3596
3597void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3598{
3599    vq->host_notifier_enabled = enabled;
3600}
3601
3602int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3603                                      MemoryRegion *mr, bool assign)
3604{
3605    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3606    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3607
3608    if (k->set_host_notifier_mr) {
3609        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3610    }
3611
3612    return -1;
3613}
3614
3615void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3616{
3617    g_free(vdev->bus_name);
3618    vdev->bus_name = g_strdup(bus_name);
3619}
3620
3621void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3622{
3623    va_list ap;
3624
3625    va_start(ap, fmt);
3626    error_vreport(fmt, ap);
3627    va_end(ap);
3628
3629    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3630        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3631        virtio_notify_config(vdev);
3632    }
3633
3634    vdev->broken = true;
3635}
3636
3637static void virtio_memory_listener_commit(MemoryListener *listener)
3638{
3639    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3640    int i;
3641
3642    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3643        if (vdev->vq[i].vring.num == 0) {
3644            break;
3645        }
3646        virtio_init_region_cache(vdev, i);
3647    }
3648}
3649
3650static void virtio_device_realize(DeviceState *dev, Error **errp)
3651{
3652    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3653    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3654    Error *err = NULL;
3655
3656    /* Devices should either use vmsd or the load/save methods */
3657    assert(!vdc->vmsd || !vdc->load);
3658
3659    if (vdc->realize != NULL) {
3660        vdc->realize(dev, &err);
3661        if (err != NULL) {
3662            error_propagate(errp, err);
3663            return;
3664        }
3665    }
3666
3667    virtio_bus_device_plugged(vdev, &err);
3668    if (err != NULL) {
3669        error_propagate(errp, err);
3670        vdc->unrealize(dev);
3671        return;
3672    }
3673
3674    vdev->listener.commit = virtio_memory_listener_commit;
3675    memory_listener_register(&vdev->listener, vdev->dma_as);
3676}
3677
3678static void virtio_device_unrealize(DeviceState *dev)
3679{
3680    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3681    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3682
3683    memory_listener_unregister(&vdev->listener);
3684    virtio_bus_device_unplugged(vdev);
3685
3686    if (vdc->unrealize != NULL) {
3687        vdc->unrealize(dev);
3688    }
3689
3690    g_free(vdev->bus_name);
3691    vdev->bus_name = NULL;
3692}
3693
3694static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3695{
3696    int i;
3697    if (!vdev->vq) {
3698        return;
3699    }
3700
3701    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3702        if (vdev->vq[i].vring.num == 0) {
3703            break;
3704        }
3705        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3706    }
3707    g_free(vdev->vq);
3708}
3709
3710static void virtio_device_instance_finalize(Object *obj)
3711{
3712    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3713
3714    virtio_device_free_virtqueues(vdev);
3715
3716    g_free(vdev->config);
3717    g_free(vdev->vector_queues);
3718}
3719
3720static Property virtio_properties[] = {
3721    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3722    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3723    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3724    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3725                     disable_legacy_check, false),
3726    DEFINE_PROP_END_OF_LIST(),
3727};
3728
3729static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3730{
3731    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3732    int i, n, r, err;
3733
3734    memory_region_transaction_begin();
3735    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3736        VirtQueue *vq = &vdev->vq[n];
3737        if (!virtio_queue_get_num(vdev, n)) {
3738            continue;
3739        }
3740        r = virtio_bus_set_host_notifier(qbus, n, true);
3741        if (r < 0) {
3742            err = r;
3743            goto assign_error;
3744        }
3745        event_notifier_set_handler(&vq->host_notifier,
3746                                   virtio_queue_host_notifier_read);
3747    }
3748
3749    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3750        /* Kick right away to begin processing requests already in vring */
3751        VirtQueue *vq = &vdev->vq[n];
3752        if (!vq->vring.num) {
3753            continue;
3754        }
3755        event_notifier_set(&vq->host_notifier);
3756    }
3757    memory_region_transaction_commit();
3758    return 0;
3759
3760assign_error:
3761    i = n; /* save n for a second iteration after transaction is committed. */
3762    while (--n >= 0) {
3763        VirtQueue *vq = &vdev->vq[n];
3764        if (!virtio_queue_get_num(vdev, n)) {
3765            continue;
3766        }
3767
3768        event_notifier_set_handler(&vq->host_notifier, NULL);
3769        r = virtio_bus_set_host_notifier(qbus, n, false);
3770        assert(r >= 0);
3771    }
3772    memory_region_transaction_commit();
3773
3774    while (--i >= 0) {
3775        if (!virtio_queue_get_num(vdev, i)) {
3776            continue;
3777        }
3778        virtio_bus_cleanup_host_notifier(qbus, i);
3779    }
3780    return err;
3781}
3782
3783int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3784{
3785    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3786    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3787
3788    return virtio_bus_start_ioeventfd(vbus);
3789}
3790
3791static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3792{
3793    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3794    int n, r;
3795
3796    memory_region_transaction_begin();
3797    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3798        VirtQueue *vq = &vdev->vq[n];
3799
3800        if (!virtio_queue_get_num(vdev, n)) {
3801            continue;
3802        }
3803        event_notifier_set_handler(&vq->host_notifier, NULL);
3804        r = virtio_bus_set_host_notifier(qbus, n, false);
3805        assert(r >= 0);
3806    }
3807    memory_region_transaction_commit();
3808
3809    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3810        if (!virtio_queue_get_num(vdev, n)) {
3811            continue;
3812        }
3813        virtio_bus_cleanup_host_notifier(qbus, n);
3814    }
3815}
3816
3817int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3818{
3819    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3820    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3821
3822    return virtio_bus_grab_ioeventfd(vbus);
3823}
3824
3825void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3826{
3827    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3828    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3829
3830    virtio_bus_release_ioeventfd(vbus);
3831}
3832
3833static void virtio_device_class_init(ObjectClass *klass, void *data)
3834{
3835    /* Set the default value here. */
3836    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3837    DeviceClass *dc = DEVICE_CLASS(klass);
3838
3839    dc->realize = virtio_device_realize;
3840    dc->unrealize = virtio_device_unrealize;
3841    dc->bus_type = TYPE_VIRTIO_BUS;
3842    device_class_set_props(dc, virtio_properties);
3843    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3844    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3845
3846    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3847}
3848
3849bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3850{
3851    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3852    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3853
3854    return virtio_bus_ioeventfd_enabled(vbus);
3855}
3856
3857static const TypeInfo virtio_device_info = {
3858    .name = TYPE_VIRTIO_DEVICE,
3859    .parent = TYPE_DEVICE,
3860    .instance_size = sizeof(VirtIODevice),
3861    .class_init = virtio_device_class_init,
3862    .instance_finalize = virtio_device_instance_finalize,
3863    .abstract = true,
3864    .class_size = sizeof(VirtioDeviceClass),
3865};
3866
3867static void virtio_register_types(void)
3868{
3869    type_register_static(&virtio_device_info);
3870}
3871
3872type_init(virtio_register_types)
3873