qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "cpu.h"
  17#include "trace.h"
  18#include "qemu/error-report.h"
  19#include "qemu/log.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "qemu/atomic.h"
  25#include "hw/virtio/virtio-bus.h"
  26#include "hw/qdev-properties.h"
  27#include "hw/virtio/virtio-access.h"
  28#include "sysemu/dma.h"
  29#include "sysemu/runstate.h"
  30#include "standard-headers/linux/virtio_ids.h"
  31
  32/*
  33 * The alignment to use between consumer and producer parts of vring.
  34 * x86 pagesize again. This is the default, used by transports like PCI
  35 * which don't provide a means for the guest to tell the host the alignment.
  36 */
  37#define VIRTIO_PCI_VRING_ALIGN         4096
  38
  39typedef struct VRingDesc
  40{
  41    uint64_t addr;
  42    uint32_t len;
  43    uint16_t flags;
  44    uint16_t next;
  45} VRingDesc;
  46
  47typedef struct VRingPackedDesc {
  48    uint64_t addr;
  49    uint32_t len;
  50    uint16_t id;
  51    uint16_t flags;
  52} VRingPackedDesc;
  53
  54typedef struct VRingAvail
  55{
  56    uint16_t flags;
  57    uint16_t idx;
  58    uint16_t ring[];
  59} VRingAvail;
  60
  61typedef struct VRingUsedElem
  62{
  63    uint32_t id;
  64    uint32_t len;
  65} VRingUsedElem;
  66
  67typedef struct VRingUsed
  68{
  69    uint16_t flags;
  70    uint16_t idx;
  71    VRingUsedElem ring[];
  72} VRingUsed;
  73
  74typedef struct VRingMemoryRegionCaches {
  75    struct rcu_head rcu;
  76    MemoryRegionCache desc;
  77    MemoryRegionCache avail;
  78    MemoryRegionCache used;
  79} VRingMemoryRegionCaches;
  80
  81typedef struct VRing
  82{
  83    unsigned int num;
  84    unsigned int num_default;
  85    unsigned int align;
  86    hwaddr desc;
  87    hwaddr avail;
  88    hwaddr used;
  89    VRingMemoryRegionCaches *caches;
  90} VRing;
  91
  92typedef struct VRingPackedDescEvent {
  93    uint16_t off_wrap;
  94    uint16_t flags;
  95} VRingPackedDescEvent ;
  96
  97struct VirtQueue
  98{
  99    VRing vring;
 100    VirtQueueElement *used_elems;
 101
 102    /* Next head to pop */
 103    uint16_t last_avail_idx;
 104    bool last_avail_wrap_counter;
 105
 106    /* Last avail_idx read from VQ. */
 107    uint16_t shadow_avail_idx;
 108    bool shadow_avail_wrap_counter;
 109
 110    uint16_t used_idx;
 111    bool used_wrap_counter;
 112
 113    /* Last used index value we have signalled on */
 114    uint16_t signalled_used;
 115
 116    /* Last used index value we have signalled on */
 117    bool signalled_used_valid;
 118
 119    /* Notification enabled? */
 120    bool notification;
 121
 122    uint16_t queue_index;
 123
 124    unsigned int inuse;
 125
 126    uint16_t vector;
 127    VirtIOHandleOutput handle_output;
 128    VirtIODevice *vdev;
 129    EventNotifier guest_notifier;
 130    EventNotifier host_notifier;
 131    bool host_notifier_enabled;
 132    QLIST_ENTRY(VirtQueue) node;
 133};
 134
 135const char *virtio_device_names[] = {
 136    [VIRTIO_ID_NET] = "virtio-net",
 137    [VIRTIO_ID_BLOCK] = "virtio-blk",
 138    [VIRTIO_ID_CONSOLE] = "virtio-serial",
 139    [VIRTIO_ID_RNG] = "virtio-rng",
 140    [VIRTIO_ID_BALLOON] = "virtio-balloon",
 141    [VIRTIO_ID_IOMEM] = "virtio-iomem",
 142    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
 143    [VIRTIO_ID_SCSI] = "virtio-scsi",
 144    [VIRTIO_ID_9P] = "virtio-9p",
 145    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
 146    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
 147    [VIRTIO_ID_CAIF] = "virtio-caif",
 148    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
 149    [VIRTIO_ID_GPU] = "virtio-gpu",
 150    [VIRTIO_ID_CLOCK] = "virtio-clk",
 151    [VIRTIO_ID_INPUT] = "virtio-input",
 152    [VIRTIO_ID_VSOCK] = "vhost-vsock",
 153    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
 154    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
 155    [VIRTIO_ID_PSTORE] = "virtio-pstore",
 156    [VIRTIO_ID_IOMMU] = "virtio-iommu",
 157    [VIRTIO_ID_MEM] = "virtio-mem",
 158    [VIRTIO_ID_SOUND] = "virtio-sound",
 159    [VIRTIO_ID_FS] = "virtio-user-fs",
 160    [VIRTIO_ID_PMEM] = "virtio-pmem",
 161    [VIRTIO_ID_RPMB] = "virtio-rpmb",
 162    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
 163    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
 164    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
 165    [VIRTIO_ID_SCMI] = "virtio-scmi",
 166    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
 167    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
 168    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
 169    [VIRTIO_ID_CAN] = "virtio-can",
 170    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
 171    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
 172    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
 173    [VIRTIO_ID_BT] = "virtio-bluetooth",
 174    [VIRTIO_ID_GPIO] = "virtio-gpio"
 175};
 176
 177static const char *virtio_id_to_name(uint16_t device_id)
 178{
 179    assert(device_id < G_N_ELEMENTS(virtio_device_names));
 180    const char *name = virtio_device_names[device_id];
 181    assert(name != NULL);
 182    return name;
 183}
 184
 185/* Called within call_rcu().  */
 186static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 187{
 188    assert(caches != NULL);
 189    address_space_cache_destroy(&caches->desc);
 190    address_space_cache_destroy(&caches->avail);
 191    address_space_cache_destroy(&caches->used);
 192    g_free(caches);
 193}
 194
 195static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 196{
 197    VRingMemoryRegionCaches *caches;
 198
 199    caches = qatomic_read(&vq->vring.caches);
 200    qatomic_rcu_set(&vq->vring.caches, NULL);
 201    if (caches) {
 202        call_rcu(caches, virtio_free_region_cache, rcu);
 203    }
 204}
 205
 206static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 207{
 208    VirtQueue *vq = &vdev->vq[n];
 209    VRingMemoryRegionCaches *old = vq->vring.caches;
 210    VRingMemoryRegionCaches *new = NULL;
 211    hwaddr addr, size;
 212    int64_t len;
 213    bool packed;
 214
 215
 216    addr = vq->vring.desc;
 217    if (!addr) {
 218        goto out_no_cache;
 219    }
 220    new = g_new0(VRingMemoryRegionCaches, 1);
 221    size = virtio_queue_get_desc_size(vdev, n);
 222    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 223                                   true : false;
 224    len = address_space_cache_init(&new->desc, vdev->dma_as,
 225                                   addr, size, packed);
 226    if (len < size) {
 227        virtio_error(vdev, "Cannot map desc");
 228        goto err_desc;
 229    }
 230
 231    size = virtio_queue_get_used_size(vdev, n);
 232    len = address_space_cache_init(&new->used, vdev->dma_as,
 233                                   vq->vring.used, size, true);
 234    if (len < size) {
 235        virtio_error(vdev, "Cannot map used");
 236        goto err_used;
 237    }
 238
 239    size = virtio_queue_get_avail_size(vdev, n);
 240    len = address_space_cache_init(&new->avail, vdev->dma_as,
 241                                   vq->vring.avail, size, false);
 242    if (len < size) {
 243        virtio_error(vdev, "Cannot map avail");
 244        goto err_avail;
 245    }
 246
 247    qatomic_rcu_set(&vq->vring.caches, new);
 248    if (old) {
 249        call_rcu(old, virtio_free_region_cache, rcu);
 250    }
 251    return;
 252
 253err_avail:
 254    address_space_cache_destroy(&new->avail);
 255err_used:
 256    address_space_cache_destroy(&new->used);
 257err_desc:
 258    address_space_cache_destroy(&new->desc);
 259out_no_cache:
 260    g_free(new);
 261    virtio_virtqueue_reset_region_cache(vq);
 262}
 263
 264/* virt queue functions */
 265void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 266{
 267    VRing *vring = &vdev->vq[n].vring;
 268
 269    if (!vring->num || !vring->desc || !vring->align) {
 270        /* not yet setup -> nothing to do */
 271        return;
 272    }
 273    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 274    vring->used = vring_align(vring->avail +
 275                              offsetof(VRingAvail, ring[vring->num]),
 276                              vring->align);
 277    virtio_init_region_cache(vdev, n);
 278}
 279
 280/* Called within rcu_read_lock().  */
 281static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 282                                  MemoryRegionCache *cache, int i)
 283{
 284    address_space_read_cached(cache, i * sizeof(VRingDesc),
 285                              desc, sizeof(VRingDesc));
 286    virtio_tswap64s(vdev, &desc->addr);
 287    virtio_tswap32s(vdev, &desc->len);
 288    virtio_tswap16s(vdev, &desc->flags);
 289    virtio_tswap16s(vdev, &desc->next);
 290}
 291
 292static void vring_packed_event_read(VirtIODevice *vdev,
 293                                    MemoryRegionCache *cache,
 294                                    VRingPackedDescEvent *e)
 295{
 296    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 297    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 298
 299    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 300    /* Make sure flags is seen before off_wrap */
 301    smp_rmb();
 302    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 303    virtio_tswap16s(vdev, &e->flags);
 304}
 305
 306static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 307                                        MemoryRegionCache *cache,
 308                                        uint16_t off_wrap)
 309{
 310    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 311
 312    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 313    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 314}
 315
 316static void vring_packed_flags_write(VirtIODevice *vdev,
 317                                     MemoryRegionCache *cache, uint16_t flags)
 318{
 319    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 320
 321    virtio_stw_phys_cached(vdev, cache, off, flags);
 322    address_space_cache_invalidate(cache, off, sizeof(flags));
 323}
 324
 325/* Called within rcu_read_lock().  */
 326static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 327{
 328    return qatomic_rcu_read(&vq->vring.caches);
 329}
 330
 331/* Called within rcu_read_lock().  */
 332static inline uint16_t vring_avail_flags(VirtQueue *vq)
 333{
 334    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 335    hwaddr pa = offsetof(VRingAvail, flags);
 336
 337    if (!caches) {
 338        return 0;
 339    }
 340
 341    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 342}
 343
 344/* Called within rcu_read_lock().  */
 345static inline uint16_t vring_avail_idx(VirtQueue *vq)
 346{
 347    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 348    hwaddr pa = offsetof(VRingAvail, idx);
 349
 350    if (!caches) {
 351        return 0;
 352    }
 353
 354    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 355    return vq->shadow_avail_idx;
 356}
 357
 358/* Called within rcu_read_lock().  */
 359static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 360{
 361    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 362    hwaddr pa = offsetof(VRingAvail, ring[i]);
 363
 364    if (!caches) {
 365        return 0;
 366    }
 367
 368    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 369}
 370
 371/* Called within rcu_read_lock().  */
 372static inline uint16_t vring_get_used_event(VirtQueue *vq)
 373{
 374    return vring_avail_ring(vq, vq->vring.num);
 375}
 376
 377/* Called within rcu_read_lock().  */
 378static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 379                                    int i)
 380{
 381    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 382    hwaddr pa = offsetof(VRingUsed, ring[i]);
 383
 384    if (!caches) {
 385        return;
 386    }
 387
 388    virtio_tswap32s(vq->vdev, &uelem->id);
 389    virtio_tswap32s(vq->vdev, &uelem->len);
 390    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 391    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 392}
 393
 394/* Called within rcu_read_lock().  */
 395static uint16_t vring_used_idx(VirtQueue *vq)
 396{
 397    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 398    hwaddr pa = offsetof(VRingUsed, idx);
 399
 400    if (!caches) {
 401        return 0;
 402    }
 403
 404    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 405}
 406
 407/* Called within rcu_read_lock().  */
 408static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 409{
 410    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 411    hwaddr pa = offsetof(VRingUsed, idx);
 412
 413    if (caches) {
 414        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 415        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 416    }
 417
 418    vq->used_idx = val;
 419}
 420
 421/* Called within rcu_read_lock().  */
 422static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 423{
 424    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 425    VirtIODevice *vdev = vq->vdev;
 426    hwaddr pa = offsetof(VRingUsed, flags);
 427    uint16_t flags;
 428
 429    if (!caches) {
 430        return;
 431    }
 432
 433    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 434    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 435    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 436}
 437
 438/* Called within rcu_read_lock().  */
 439static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 440{
 441    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 442    VirtIODevice *vdev = vq->vdev;
 443    hwaddr pa = offsetof(VRingUsed, flags);
 444    uint16_t flags;
 445
 446    if (!caches) {
 447        return;
 448    }
 449
 450    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 451    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 452    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 453}
 454
 455/* Called within rcu_read_lock().  */
 456static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 457{
 458    VRingMemoryRegionCaches *caches;
 459    hwaddr pa;
 460    if (!vq->notification) {
 461        return;
 462    }
 463
 464    caches = vring_get_region_caches(vq);
 465    if (!caches) {
 466        return;
 467    }
 468
 469    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 470    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 471    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 472}
 473
 474static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 475{
 476    RCU_READ_LOCK_GUARD();
 477
 478    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 479        vring_set_avail_event(vq, vring_avail_idx(vq));
 480    } else if (enable) {
 481        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 482    } else {
 483        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 484    }
 485    if (enable) {
 486        /* Expose avail event/used flags before caller checks the avail idx. */
 487        smp_mb();
 488    }
 489}
 490
 491static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 492{
 493    uint16_t off_wrap;
 494    VRingPackedDescEvent e;
 495    VRingMemoryRegionCaches *caches;
 496
 497    RCU_READ_LOCK_GUARD();
 498    caches = vring_get_region_caches(vq);
 499    if (!caches) {
 500        return;
 501    }
 502
 503    vring_packed_event_read(vq->vdev, &caches->used, &e);
 504
 505    if (!enable) {
 506        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 507    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 508        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 509        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 510        /* Make sure off_wrap is wrote before flags */
 511        smp_wmb();
 512        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 513    } else {
 514        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 515    }
 516
 517    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 518    if (enable) {
 519        /* Expose avail event/used flags before caller checks the avail idx. */
 520        smp_mb();
 521    }
 522}
 523
 524bool virtio_queue_get_notification(VirtQueue *vq)
 525{
 526    return vq->notification;
 527}
 528
 529void virtio_queue_set_notification(VirtQueue *vq, int enable)
 530{
 531    vq->notification = enable;
 532
 533    if (!vq->vring.desc) {
 534        return;
 535    }
 536
 537    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 538        virtio_queue_packed_set_notification(vq, enable);
 539    } else {
 540        virtio_queue_split_set_notification(vq, enable);
 541    }
 542}
 543
 544int virtio_queue_ready(VirtQueue *vq)
 545{
 546    return vq->vring.avail != 0;
 547}
 548
 549static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 550                                         uint16_t *flags,
 551                                         MemoryRegionCache *cache,
 552                                         int i)
 553{
 554    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 555
 556    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 557}
 558
 559static void vring_packed_desc_read(VirtIODevice *vdev,
 560                                   VRingPackedDesc *desc,
 561                                   MemoryRegionCache *cache,
 562                                   int i, bool strict_order)
 563{
 564    hwaddr off = i * sizeof(VRingPackedDesc);
 565
 566    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 567
 568    if (strict_order) {
 569        /* Make sure flags is read before the rest fields. */
 570        smp_rmb();
 571    }
 572
 573    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 574                              &desc->addr, sizeof(desc->addr));
 575    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 576                              &desc->id, sizeof(desc->id));
 577    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 578                              &desc->len, sizeof(desc->len));
 579    virtio_tswap64s(vdev, &desc->addr);
 580    virtio_tswap16s(vdev, &desc->id);
 581    virtio_tswap32s(vdev, &desc->len);
 582}
 583
 584static void vring_packed_desc_write_data(VirtIODevice *vdev,
 585                                         VRingPackedDesc *desc,
 586                                         MemoryRegionCache *cache,
 587                                         int i)
 588{
 589    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 590                    offsetof(VRingPackedDesc, id);
 591    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 592                    offsetof(VRingPackedDesc, len);
 593
 594    virtio_tswap32s(vdev, &desc->len);
 595    virtio_tswap16s(vdev, &desc->id);
 596    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 597    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 598    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 599    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 600}
 601
 602static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 603                                          VRingPackedDesc *desc,
 604                                          MemoryRegionCache *cache,
 605                                          int i)
 606{
 607    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 608
 609    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
 610    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 611}
 612
 613static void vring_packed_desc_write(VirtIODevice *vdev,
 614                                    VRingPackedDesc *desc,
 615                                    MemoryRegionCache *cache,
 616                                    int i, bool strict_order)
 617{
 618    vring_packed_desc_write_data(vdev, desc, cache, i);
 619    if (strict_order) {
 620        /* Make sure data is wrote before flags. */
 621        smp_wmb();
 622    }
 623    vring_packed_desc_write_flags(vdev, desc, cache, i);
 624}
 625
 626static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 627{
 628    bool avail, used;
 629
 630    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 631    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 632    return (avail != used) && (avail == wrap_counter);
 633}
 634
 635/* Fetch avail_idx from VQ memory only when we really need to know if
 636 * guest has added some buffers.
 637 * Called within rcu_read_lock().  */
 638static int virtio_queue_empty_rcu(VirtQueue *vq)
 639{
 640    if (virtio_device_disabled(vq->vdev)) {
 641        return 1;
 642    }
 643
 644    if (unlikely(!vq->vring.avail)) {
 645        return 1;
 646    }
 647
 648    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 649        return 0;
 650    }
 651
 652    return vring_avail_idx(vq) == vq->last_avail_idx;
 653}
 654
 655static int virtio_queue_split_empty(VirtQueue *vq)
 656{
 657    bool empty;
 658
 659    if (virtio_device_disabled(vq->vdev)) {
 660        return 1;
 661    }
 662
 663    if (unlikely(!vq->vring.avail)) {
 664        return 1;
 665    }
 666
 667    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 668        return 0;
 669    }
 670
 671    RCU_READ_LOCK_GUARD();
 672    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 673    return empty;
 674}
 675
 676/* Called within rcu_read_lock().  */
 677static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 678{
 679    struct VRingPackedDesc desc;
 680    VRingMemoryRegionCaches *cache;
 681
 682    if (unlikely(!vq->vring.desc)) {
 683        return 1;
 684    }
 685
 686    cache = vring_get_region_caches(vq);
 687    if (!cache) {
 688        return 1;
 689    }
 690
 691    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 692                                 vq->last_avail_idx);
 693
 694    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 695}
 696
 697static int virtio_queue_packed_empty(VirtQueue *vq)
 698{
 699    RCU_READ_LOCK_GUARD();
 700    return virtio_queue_packed_empty_rcu(vq);
 701}
 702
 703int virtio_queue_empty(VirtQueue *vq)
 704{
 705    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 706        return virtio_queue_packed_empty(vq);
 707    } else {
 708        return virtio_queue_split_empty(vq);
 709    }
 710}
 711
 712static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 713                               unsigned int len)
 714{
 715    AddressSpace *dma_as = vq->vdev->dma_as;
 716    unsigned int offset;
 717    int i;
 718
 719    offset = 0;
 720    for (i = 0; i < elem->in_num; i++) {
 721        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 722
 723        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 724                         elem->in_sg[i].iov_len,
 725                         DMA_DIRECTION_FROM_DEVICE, size);
 726
 727        offset += size;
 728    }
 729
 730    for (i = 0; i < elem->out_num; i++)
 731        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 732                         elem->out_sg[i].iov_len,
 733                         DMA_DIRECTION_TO_DEVICE,
 734                         elem->out_sg[i].iov_len);
 735}
 736
 737/* virtqueue_detach_element:
 738 * @vq: The #VirtQueue
 739 * @elem: The #VirtQueueElement
 740 * @len: number of bytes written
 741 *
 742 * Detach the element from the virtqueue.  This function is suitable for device
 743 * reset or other situations where a #VirtQueueElement is simply freed and will
 744 * not be pushed or discarded.
 745 */
 746void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 747                              unsigned int len)
 748{
 749    vq->inuse -= elem->ndescs;
 750    virtqueue_unmap_sg(vq, elem, len);
 751}
 752
 753static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 754{
 755    vq->last_avail_idx -= num;
 756}
 757
 758static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 759{
 760    if (vq->last_avail_idx < num) {
 761        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 762        vq->last_avail_wrap_counter ^= 1;
 763    } else {
 764        vq->last_avail_idx -= num;
 765    }
 766}
 767
 768/* virtqueue_unpop:
 769 * @vq: The #VirtQueue
 770 * @elem: The #VirtQueueElement
 771 * @len: number of bytes written
 772 *
 773 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 774 * call to virtqueue_pop() will refetch the element.
 775 */
 776void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 777                     unsigned int len)
 778{
 779
 780    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 781        virtqueue_packed_rewind(vq, 1);
 782    } else {
 783        virtqueue_split_rewind(vq, 1);
 784    }
 785
 786    virtqueue_detach_element(vq, elem, len);
 787}
 788
 789/* virtqueue_rewind:
 790 * @vq: The #VirtQueue
 791 * @num: Number of elements to push back
 792 *
 793 * Pretend that elements weren't popped from the virtqueue.  The next
 794 * virtqueue_pop() will refetch the oldest element.
 795 *
 796 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 797 *
 798 * Returns: true on success, false if @num is greater than the number of in use
 799 * elements.
 800 */
 801bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 802{
 803    if (num > vq->inuse) {
 804        return false;
 805    }
 806
 807    vq->inuse -= num;
 808    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 809        virtqueue_packed_rewind(vq, num);
 810    } else {
 811        virtqueue_split_rewind(vq, num);
 812    }
 813    return true;
 814}
 815
 816static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 817                    unsigned int len, unsigned int idx)
 818{
 819    VRingUsedElem uelem;
 820
 821    if (unlikely(!vq->vring.used)) {
 822        return;
 823    }
 824
 825    idx = (idx + vq->used_idx) % vq->vring.num;
 826
 827    uelem.id = elem->index;
 828    uelem.len = len;
 829    vring_used_write(vq, &uelem, idx);
 830}
 831
 832static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 833                                  unsigned int len, unsigned int idx)
 834{
 835    vq->used_elems[idx].index = elem->index;
 836    vq->used_elems[idx].len = len;
 837    vq->used_elems[idx].ndescs = elem->ndescs;
 838}
 839
 840static void virtqueue_packed_fill_desc(VirtQueue *vq,
 841                                       const VirtQueueElement *elem,
 842                                       unsigned int idx,
 843                                       bool strict_order)
 844{
 845    uint16_t head;
 846    VRingMemoryRegionCaches *caches;
 847    VRingPackedDesc desc = {
 848        .id = elem->index,
 849        .len = elem->len,
 850    };
 851    bool wrap_counter = vq->used_wrap_counter;
 852
 853    if (unlikely(!vq->vring.desc)) {
 854        return;
 855    }
 856
 857    head = vq->used_idx + idx;
 858    if (head >= vq->vring.num) {
 859        head -= vq->vring.num;
 860        wrap_counter ^= 1;
 861    }
 862    if (wrap_counter) {
 863        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 864        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 865    } else {
 866        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 867        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 868    }
 869
 870    caches = vring_get_region_caches(vq);
 871    if (!caches) {
 872        return;
 873    }
 874
 875    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 876}
 877
 878/* Called within rcu_read_lock().  */
 879void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 880                    unsigned int len, unsigned int idx)
 881{
 882    trace_virtqueue_fill(vq, elem, len, idx);
 883
 884    virtqueue_unmap_sg(vq, elem, len);
 885
 886    if (virtio_device_disabled(vq->vdev)) {
 887        return;
 888    }
 889
 890    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 891        virtqueue_packed_fill(vq, elem, len, idx);
 892    } else {
 893        virtqueue_split_fill(vq, elem, len, idx);
 894    }
 895}
 896
 897/* Called within rcu_read_lock().  */
 898static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 899{
 900    uint16_t old, new;
 901
 902    if (unlikely(!vq->vring.used)) {
 903        return;
 904    }
 905
 906    /* Make sure buffer is written before we update index. */
 907    smp_wmb();
 908    trace_virtqueue_flush(vq, count);
 909    old = vq->used_idx;
 910    new = old + count;
 911    vring_used_idx_set(vq, new);
 912    vq->inuse -= count;
 913    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 914        vq->signalled_used_valid = false;
 915}
 916
 917static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 918{
 919    unsigned int i, ndescs = 0;
 920
 921    if (unlikely(!vq->vring.desc)) {
 922        return;
 923    }
 924
 925    for (i = 1; i < count; i++) {
 926        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 927        ndescs += vq->used_elems[i].ndescs;
 928    }
 929    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 930    ndescs += vq->used_elems[0].ndescs;
 931
 932    vq->inuse -= ndescs;
 933    vq->used_idx += ndescs;
 934    if (vq->used_idx >= vq->vring.num) {
 935        vq->used_idx -= vq->vring.num;
 936        vq->used_wrap_counter ^= 1;
 937        vq->signalled_used_valid = false;
 938    }
 939}
 940
 941void virtqueue_flush(VirtQueue *vq, unsigned int count)
 942{
 943    if (virtio_device_disabled(vq->vdev)) {
 944        vq->inuse -= count;
 945        return;
 946    }
 947
 948    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 949        virtqueue_packed_flush(vq, count);
 950    } else {
 951        virtqueue_split_flush(vq, count);
 952    }
 953}
 954
 955void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 956                    unsigned int len)
 957{
 958    RCU_READ_LOCK_GUARD();
 959    virtqueue_fill(vq, elem, len, 0);
 960    virtqueue_flush(vq, 1);
 961}
 962
 963/* Called within rcu_read_lock().  */
 964static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 965{
 966    uint16_t num_heads = vring_avail_idx(vq) - idx;
 967
 968    /* Check it isn't doing very strange things with descriptor numbers. */
 969    if (num_heads > vq->vring.num) {
 970        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
 971                     idx, vq->shadow_avail_idx);
 972        return -EINVAL;
 973    }
 974    /* On success, callers read a descriptor at vq->last_avail_idx.
 975     * Make sure descriptor read does not bypass avail index read. */
 976    if (num_heads) {
 977        smp_rmb();
 978    }
 979
 980    return num_heads;
 981}
 982
 983/* Called within rcu_read_lock().  */
 984static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
 985                               unsigned int *head)
 986{
 987    /* Grab the next descriptor number they're advertising, and increment
 988     * the index we've seen. */
 989    *head = vring_avail_ring(vq, idx % vq->vring.num);
 990
 991    /* If their number is silly, that's a fatal mistake. */
 992    if (*head >= vq->vring.num) {
 993        virtio_error(vq->vdev, "Guest says index %u is available", *head);
 994        return false;
 995    }
 996
 997    return true;
 998}
 999
1000enum {
1001    VIRTQUEUE_READ_DESC_ERROR = -1,
1002    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1003    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1004};
1005
1006static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1007                                          MemoryRegionCache *desc_cache,
1008                                          unsigned int max, unsigned int *next)
1009{
1010    /* If this descriptor says it doesn't chain, we're done. */
1011    if (!(desc->flags & VRING_DESC_F_NEXT)) {
1012        return VIRTQUEUE_READ_DESC_DONE;
1013    }
1014
1015    /* Check they're not leading us off end of descriptors. */
1016    *next = desc->next;
1017    /* Make sure compiler knows to grab that: we don't want it changing! */
1018    smp_wmb();
1019
1020    if (*next >= max) {
1021        virtio_error(vdev, "Desc next is %u", *next);
1022        return VIRTQUEUE_READ_DESC_ERROR;
1023    }
1024
1025    vring_split_desc_read(vdev, desc, desc_cache, *next);
1026    return VIRTQUEUE_READ_DESC_MORE;
1027}
1028
1029/* Called within rcu_read_lock().  */
1030static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1031                            unsigned int *in_bytes, unsigned int *out_bytes,
1032                            unsigned max_in_bytes, unsigned max_out_bytes,
1033                            VRingMemoryRegionCaches *caches)
1034{
1035    VirtIODevice *vdev = vq->vdev;
1036    unsigned int max, idx;
1037    unsigned int total_bufs, in_total, out_total;
1038    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1039    int64_t len = 0;
1040    int rc;
1041
1042    idx = vq->last_avail_idx;
1043    total_bufs = in_total = out_total = 0;
1044
1045    max = vq->vring.num;
1046
1047    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1048        MemoryRegionCache *desc_cache = &caches->desc;
1049        unsigned int num_bufs;
1050        VRingDesc desc;
1051        unsigned int i;
1052
1053        num_bufs = total_bufs;
1054
1055        if (!virtqueue_get_head(vq, idx++, &i)) {
1056            goto err;
1057        }
1058
1059        vring_split_desc_read(vdev, &desc, desc_cache, i);
1060
1061        if (desc.flags & VRING_DESC_F_INDIRECT) {
1062            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1063                virtio_error(vdev, "Invalid size for indirect buffer table");
1064                goto err;
1065            }
1066
1067            /* If we've got too many, that implies a descriptor loop. */
1068            if (num_bufs >= max) {
1069                virtio_error(vdev, "Looped descriptor");
1070                goto err;
1071            }
1072
1073            /* loop over the indirect descriptor table */
1074            len = address_space_cache_init(&indirect_desc_cache,
1075                                           vdev->dma_as,
1076                                           desc.addr, desc.len, false);
1077            desc_cache = &indirect_desc_cache;
1078            if (len < desc.len) {
1079                virtio_error(vdev, "Cannot map indirect buffer");
1080                goto err;
1081            }
1082
1083            max = desc.len / sizeof(VRingDesc);
1084            num_bufs = i = 0;
1085            vring_split_desc_read(vdev, &desc, desc_cache, i);
1086        }
1087
1088        do {
1089            /* If we've got too many, that implies a descriptor loop. */
1090            if (++num_bufs > max) {
1091                virtio_error(vdev, "Looped descriptor");
1092                goto err;
1093            }
1094
1095            if (desc.flags & VRING_DESC_F_WRITE) {
1096                in_total += desc.len;
1097            } else {
1098                out_total += desc.len;
1099            }
1100            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1101                goto done;
1102            }
1103
1104            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1105        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1106
1107        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1108            goto err;
1109        }
1110
1111        if (desc_cache == &indirect_desc_cache) {
1112            address_space_cache_destroy(&indirect_desc_cache);
1113            total_bufs++;
1114        } else {
1115            total_bufs = num_bufs;
1116        }
1117    }
1118
1119    if (rc < 0) {
1120        goto err;
1121    }
1122
1123done:
1124    address_space_cache_destroy(&indirect_desc_cache);
1125    if (in_bytes) {
1126        *in_bytes = in_total;
1127    }
1128    if (out_bytes) {
1129        *out_bytes = out_total;
1130    }
1131    return;
1132
1133err:
1134    in_total = out_total = 0;
1135    goto done;
1136}
1137
1138static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1139                                           VRingPackedDesc *desc,
1140                                           MemoryRegionCache
1141                                           *desc_cache,
1142                                           unsigned int max,
1143                                           unsigned int *next,
1144                                           bool indirect)
1145{
1146    /* If this descriptor says it doesn't chain, we're done. */
1147    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1148        return VIRTQUEUE_READ_DESC_DONE;
1149    }
1150
1151    ++*next;
1152    if (*next == max) {
1153        if (indirect) {
1154            return VIRTQUEUE_READ_DESC_DONE;
1155        } else {
1156            (*next) -= vq->vring.num;
1157        }
1158    }
1159
1160    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1161    return VIRTQUEUE_READ_DESC_MORE;
1162}
1163
1164/* Called within rcu_read_lock().  */
1165static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1166                                             unsigned int *in_bytes,
1167                                             unsigned int *out_bytes,
1168                                             unsigned max_in_bytes,
1169                                             unsigned max_out_bytes,
1170                                             VRingMemoryRegionCaches *caches)
1171{
1172    VirtIODevice *vdev = vq->vdev;
1173    unsigned int max, idx;
1174    unsigned int total_bufs, in_total, out_total;
1175    MemoryRegionCache *desc_cache;
1176    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1177    int64_t len = 0;
1178    VRingPackedDesc desc;
1179    bool wrap_counter;
1180
1181    idx = vq->last_avail_idx;
1182    wrap_counter = vq->last_avail_wrap_counter;
1183    total_bufs = in_total = out_total = 0;
1184
1185    max = vq->vring.num;
1186
1187    for (;;) {
1188        unsigned int num_bufs = total_bufs;
1189        unsigned int i = idx;
1190        int rc;
1191
1192        desc_cache = &caches->desc;
1193        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1194        if (!is_desc_avail(desc.flags, wrap_counter)) {
1195            break;
1196        }
1197
1198        if (desc.flags & VRING_DESC_F_INDIRECT) {
1199            if (desc.len % sizeof(VRingPackedDesc)) {
1200                virtio_error(vdev, "Invalid size for indirect buffer table");
1201                goto err;
1202            }
1203
1204            /* If we've got too many, that implies a descriptor loop. */
1205            if (num_bufs >= max) {
1206                virtio_error(vdev, "Looped descriptor");
1207                goto err;
1208            }
1209
1210            /* loop over the indirect descriptor table */
1211            len = address_space_cache_init(&indirect_desc_cache,
1212                                           vdev->dma_as,
1213                                           desc.addr, desc.len, false);
1214            desc_cache = &indirect_desc_cache;
1215            if (len < desc.len) {
1216                virtio_error(vdev, "Cannot map indirect buffer");
1217                goto err;
1218            }
1219
1220            max = desc.len / sizeof(VRingPackedDesc);
1221            num_bufs = i = 0;
1222            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1223        }
1224
1225        do {
1226            /* If we've got too many, that implies a descriptor loop. */
1227            if (++num_bufs > max) {
1228                virtio_error(vdev, "Looped descriptor");
1229                goto err;
1230            }
1231
1232            if (desc.flags & VRING_DESC_F_WRITE) {
1233                in_total += desc.len;
1234            } else {
1235                out_total += desc.len;
1236            }
1237            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1238                goto done;
1239            }
1240
1241            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1242                                                 &i, desc_cache ==
1243                                                 &indirect_desc_cache);
1244        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1245
1246        if (desc_cache == &indirect_desc_cache) {
1247            address_space_cache_destroy(&indirect_desc_cache);
1248            total_bufs++;
1249            idx++;
1250        } else {
1251            idx += num_bufs - total_bufs;
1252            total_bufs = num_bufs;
1253        }
1254
1255        if (idx >= vq->vring.num) {
1256            idx -= vq->vring.num;
1257            wrap_counter ^= 1;
1258        }
1259    }
1260
1261    /* Record the index and wrap counter for a kick we want */
1262    vq->shadow_avail_idx = idx;
1263    vq->shadow_avail_wrap_counter = wrap_counter;
1264done:
1265    address_space_cache_destroy(&indirect_desc_cache);
1266    if (in_bytes) {
1267        *in_bytes = in_total;
1268    }
1269    if (out_bytes) {
1270        *out_bytes = out_total;
1271    }
1272    return;
1273
1274err:
1275    in_total = out_total = 0;
1276    goto done;
1277}
1278
1279void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1280                               unsigned int *out_bytes,
1281                               unsigned max_in_bytes, unsigned max_out_bytes)
1282{
1283    uint16_t desc_size;
1284    VRingMemoryRegionCaches *caches;
1285
1286    RCU_READ_LOCK_GUARD();
1287
1288    if (unlikely(!vq->vring.desc)) {
1289        goto err;
1290    }
1291
1292    caches = vring_get_region_caches(vq);
1293    if (!caches) {
1294        goto err;
1295    }
1296
1297    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1298                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1299    if (caches->desc.len < vq->vring.num * desc_size) {
1300        virtio_error(vq->vdev, "Cannot map descriptor ring");
1301        goto err;
1302    }
1303
1304    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1305        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1306                                         max_in_bytes, max_out_bytes,
1307                                         caches);
1308    } else {
1309        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1310                                        max_in_bytes, max_out_bytes,
1311                                        caches);
1312    }
1313
1314    return;
1315err:
1316    if (in_bytes) {
1317        *in_bytes = 0;
1318    }
1319    if (out_bytes) {
1320        *out_bytes = 0;
1321    }
1322}
1323
1324int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1325                          unsigned int out_bytes)
1326{
1327    unsigned int in_total, out_total;
1328
1329    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1330    return in_bytes <= in_total && out_bytes <= out_total;
1331}
1332
1333static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1334                               hwaddr *addr, struct iovec *iov,
1335                               unsigned int max_num_sg, bool is_write,
1336                               hwaddr pa, size_t sz)
1337{
1338    bool ok = false;
1339    unsigned num_sg = *p_num_sg;
1340    assert(num_sg <= max_num_sg);
1341
1342    if (!sz) {
1343        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1344        goto out;
1345    }
1346
1347    while (sz) {
1348        hwaddr len = sz;
1349
1350        if (num_sg == max_num_sg) {
1351            virtio_error(vdev, "virtio: too many write descriptors in "
1352                               "indirect table");
1353            goto out;
1354        }
1355
1356        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1357                                              is_write ?
1358                                              DMA_DIRECTION_FROM_DEVICE :
1359                                              DMA_DIRECTION_TO_DEVICE,
1360                                              MEMTXATTRS_UNSPECIFIED);
1361        if (!iov[num_sg].iov_base) {
1362            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1363            goto out;
1364        }
1365
1366        iov[num_sg].iov_len = len;
1367        addr[num_sg] = pa;
1368
1369        sz -= len;
1370        pa += len;
1371        num_sg++;
1372    }
1373    ok = true;
1374
1375out:
1376    *p_num_sg = num_sg;
1377    return ok;
1378}
1379
1380/* Only used by error code paths before we have a VirtQueueElement (therefore
1381 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1382 * yet.
1383 */
1384static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1385                                    struct iovec *iov)
1386{
1387    unsigned int i;
1388
1389    for (i = 0; i < out_num + in_num; i++) {
1390        int is_write = i >= out_num;
1391
1392        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1393        iov++;
1394    }
1395}
1396
1397static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1398                                hwaddr *addr, unsigned int num_sg,
1399                                bool is_write)
1400{
1401    unsigned int i;
1402    hwaddr len;
1403
1404    for (i = 0; i < num_sg; i++) {
1405        len = sg[i].iov_len;
1406        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1407                                        addr[i], &len, is_write ?
1408                                        DMA_DIRECTION_FROM_DEVICE :
1409                                        DMA_DIRECTION_TO_DEVICE,
1410                                        MEMTXATTRS_UNSPECIFIED);
1411        if (!sg[i].iov_base) {
1412            error_report("virtio: error trying to map MMIO memory");
1413            exit(1);
1414        }
1415        if (len != sg[i].iov_len) {
1416            error_report("virtio: unexpected memory split");
1417            exit(1);
1418        }
1419    }
1420}
1421
1422void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1423{
1424    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1425    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1426                                                                        false);
1427}
1428
1429static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1430{
1431    VirtQueueElement *elem;
1432    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1433    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1434    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1435    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1436    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1437    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1438
1439    assert(sz >= sizeof(VirtQueueElement));
1440    elem = g_malloc(out_sg_end);
1441    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1442    elem->out_num = out_num;
1443    elem->in_num = in_num;
1444    elem->in_addr = (void *)elem + in_addr_ofs;
1445    elem->out_addr = (void *)elem + out_addr_ofs;
1446    elem->in_sg = (void *)elem + in_sg_ofs;
1447    elem->out_sg = (void *)elem + out_sg_ofs;
1448    return elem;
1449}
1450
1451static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1452{
1453    unsigned int i, head, max;
1454    VRingMemoryRegionCaches *caches;
1455    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1456    MemoryRegionCache *desc_cache;
1457    int64_t len;
1458    VirtIODevice *vdev = vq->vdev;
1459    VirtQueueElement *elem = NULL;
1460    unsigned out_num, in_num, elem_entries;
1461    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1462    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1463    VRingDesc desc;
1464    int rc;
1465
1466    RCU_READ_LOCK_GUARD();
1467    if (virtio_queue_empty_rcu(vq)) {
1468        goto done;
1469    }
1470    /* Needed after virtio_queue_empty(), see comment in
1471     * virtqueue_num_heads(). */
1472    smp_rmb();
1473
1474    /* When we start there are none of either input nor output. */
1475    out_num = in_num = elem_entries = 0;
1476
1477    max = vq->vring.num;
1478
1479    if (vq->inuse >= vq->vring.num) {
1480        virtio_error(vdev, "Virtqueue size exceeded");
1481        goto done;
1482    }
1483
1484    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1485        goto done;
1486    }
1487
1488    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1489        vring_set_avail_event(vq, vq->last_avail_idx);
1490    }
1491
1492    i = head;
1493
1494    caches = vring_get_region_caches(vq);
1495    if (!caches) {
1496        virtio_error(vdev, "Region caches not initialized");
1497        goto done;
1498    }
1499
1500    if (caches->desc.len < max * sizeof(VRingDesc)) {
1501        virtio_error(vdev, "Cannot map descriptor ring");
1502        goto done;
1503    }
1504
1505    desc_cache = &caches->desc;
1506    vring_split_desc_read(vdev, &desc, desc_cache, i);
1507    if (desc.flags & VRING_DESC_F_INDIRECT) {
1508        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1509            virtio_error(vdev, "Invalid size for indirect buffer table");
1510            goto done;
1511        }
1512
1513        /* loop over the indirect descriptor table */
1514        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1515                                       desc.addr, desc.len, false);
1516        desc_cache = &indirect_desc_cache;
1517        if (len < desc.len) {
1518            virtio_error(vdev, "Cannot map indirect buffer");
1519            goto done;
1520        }
1521
1522        max = desc.len / sizeof(VRingDesc);
1523        i = 0;
1524        vring_split_desc_read(vdev, &desc, desc_cache, i);
1525    }
1526
1527    /* Collect all the descriptors */
1528    do {
1529        bool map_ok;
1530
1531        if (desc.flags & VRING_DESC_F_WRITE) {
1532            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1533                                        iov + out_num,
1534                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1535                                        desc.addr, desc.len);
1536        } else {
1537            if (in_num) {
1538                virtio_error(vdev, "Incorrect order for descriptors");
1539                goto err_undo_map;
1540            }
1541            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1542                                        VIRTQUEUE_MAX_SIZE, false,
1543                                        desc.addr, desc.len);
1544        }
1545        if (!map_ok) {
1546            goto err_undo_map;
1547        }
1548
1549        /* If we've got too many, that implies a descriptor loop. */
1550        if (++elem_entries > max) {
1551            virtio_error(vdev, "Looped descriptor");
1552            goto err_undo_map;
1553        }
1554
1555        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1556    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1557
1558    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1559        goto err_undo_map;
1560    }
1561
1562    /* Now copy what we have collected and mapped */
1563    elem = virtqueue_alloc_element(sz, out_num, in_num);
1564    elem->index = head;
1565    elem->ndescs = 1;
1566    for (i = 0; i < out_num; i++) {
1567        elem->out_addr[i] = addr[i];
1568        elem->out_sg[i] = iov[i];
1569    }
1570    for (i = 0; i < in_num; i++) {
1571        elem->in_addr[i] = addr[out_num + i];
1572        elem->in_sg[i] = iov[out_num + i];
1573    }
1574
1575    vq->inuse++;
1576
1577    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1578done:
1579    address_space_cache_destroy(&indirect_desc_cache);
1580
1581    return elem;
1582
1583err_undo_map:
1584    virtqueue_undo_map_desc(out_num, in_num, iov);
1585    goto done;
1586}
1587
1588static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1589{
1590    unsigned int i, max;
1591    VRingMemoryRegionCaches *caches;
1592    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1593    MemoryRegionCache *desc_cache;
1594    int64_t len;
1595    VirtIODevice *vdev = vq->vdev;
1596    VirtQueueElement *elem = NULL;
1597    unsigned out_num, in_num, elem_entries;
1598    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1599    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1600    VRingPackedDesc desc;
1601    uint16_t id;
1602    int rc;
1603
1604    RCU_READ_LOCK_GUARD();
1605    if (virtio_queue_packed_empty_rcu(vq)) {
1606        goto done;
1607    }
1608
1609    /* When we start there are none of either input nor output. */
1610    out_num = in_num = elem_entries = 0;
1611
1612    max = vq->vring.num;
1613
1614    if (vq->inuse >= vq->vring.num) {
1615        virtio_error(vdev, "Virtqueue size exceeded");
1616        goto done;
1617    }
1618
1619    i = vq->last_avail_idx;
1620
1621    caches = vring_get_region_caches(vq);
1622    if (!caches) {
1623        virtio_error(vdev, "Region caches not initialized");
1624        goto done;
1625    }
1626
1627    if (caches->desc.len < max * sizeof(VRingDesc)) {
1628        virtio_error(vdev, "Cannot map descriptor ring");
1629        goto done;
1630    }
1631
1632    desc_cache = &caches->desc;
1633    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1634    id = desc.id;
1635    if (desc.flags & VRING_DESC_F_INDIRECT) {
1636        if (desc.len % sizeof(VRingPackedDesc)) {
1637            virtio_error(vdev, "Invalid size for indirect buffer table");
1638            goto done;
1639        }
1640
1641        /* loop over the indirect descriptor table */
1642        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1643                                       desc.addr, desc.len, false);
1644        desc_cache = &indirect_desc_cache;
1645        if (len < desc.len) {
1646            virtio_error(vdev, "Cannot map indirect buffer");
1647            goto done;
1648        }
1649
1650        max = desc.len / sizeof(VRingPackedDesc);
1651        i = 0;
1652        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1653    }
1654
1655    /* Collect all the descriptors */
1656    do {
1657        bool map_ok;
1658
1659        if (desc.flags & VRING_DESC_F_WRITE) {
1660            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1661                                        iov + out_num,
1662                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1663                                        desc.addr, desc.len);
1664        } else {
1665            if (in_num) {
1666                virtio_error(vdev, "Incorrect order for descriptors");
1667                goto err_undo_map;
1668            }
1669            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1670                                        VIRTQUEUE_MAX_SIZE, false,
1671                                        desc.addr, desc.len);
1672        }
1673        if (!map_ok) {
1674            goto err_undo_map;
1675        }
1676
1677        /* If we've got too many, that implies a descriptor loop. */
1678        if (++elem_entries > max) {
1679            virtio_error(vdev, "Looped descriptor");
1680            goto err_undo_map;
1681        }
1682
1683        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1684                                             desc_cache ==
1685                                             &indirect_desc_cache);
1686    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1687
1688    /* Now copy what we have collected and mapped */
1689    elem = virtqueue_alloc_element(sz, out_num, in_num);
1690    for (i = 0; i < out_num; i++) {
1691        elem->out_addr[i] = addr[i];
1692        elem->out_sg[i] = iov[i];
1693    }
1694    for (i = 0; i < in_num; i++) {
1695        elem->in_addr[i] = addr[out_num + i];
1696        elem->in_sg[i] = iov[out_num + i];
1697    }
1698
1699    elem->index = id;
1700    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1701    vq->last_avail_idx += elem->ndescs;
1702    vq->inuse += elem->ndescs;
1703
1704    if (vq->last_avail_idx >= vq->vring.num) {
1705        vq->last_avail_idx -= vq->vring.num;
1706        vq->last_avail_wrap_counter ^= 1;
1707    }
1708
1709    vq->shadow_avail_idx = vq->last_avail_idx;
1710    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1711
1712    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1713done:
1714    address_space_cache_destroy(&indirect_desc_cache);
1715
1716    return elem;
1717
1718err_undo_map:
1719    virtqueue_undo_map_desc(out_num, in_num, iov);
1720    goto done;
1721}
1722
1723void *virtqueue_pop(VirtQueue *vq, size_t sz)
1724{
1725    if (virtio_device_disabled(vq->vdev)) {
1726        return NULL;
1727    }
1728
1729    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1730        return virtqueue_packed_pop(vq, sz);
1731    } else {
1732        return virtqueue_split_pop(vq, sz);
1733    }
1734}
1735
1736static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1737{
1738    VRingMemoryRegionCaches *caches;
1739    MemoryRegionCache *desc_cache;
1740    unsigned int dropped = 0;
1741    VirtQueueElement elem = {};
1742    VirtIODevice *vdev = vq->vdev;
1743    VRingPackedDesc desc;
1744
1745    RCU_READ_LOCK_GUARD();
1746
1747    caches = vring_get_region_caches(vq);
1748    if (!caches) {
1749        return 0;
1750    }
1751
1752    desc_cache = &caches->desc;
1753
1754    virtio_queue_set_notification(vq, 0);
1755
1756    while (vq->inuse < vq->vring.num) {
1757        unsigned int idx = vq->last_avail_idx;
1758        /*
1759         * works similar to virtqueue_pop but does not map buffers
1760         * and does not allocate any memory.
1761         */
1762        vring_packed_desc_read(vdev, &desc, desc_cache,
1763                               vq->last_avail_idx , true);
1764        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1765            break;
1766        }
1767        elem.index = desc.id;
1768        elem.ndescs = 1;
1769        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1770                                               vq->vring.num, &idx, false)) {
1771            ++elem.ndescs;
1772        }
1773        /*
1774         * immediately push the element, nothing to unmap
1775         * as both in_num and out_num are set to 0.
1776         */
1777        virtqueue_push(vq, &elem, 0);
1778        dropped++;
1779        vq->last_avail_idx += elem.ndescs;
1780        if (vq->last_avail_idx >= vq->vring.num) {
1781            vq->last_avail_idx -= vq->vring.num;
1782            vq->last_avail_wrap_counter ^= 1;
1783        }
1784    }
1785
1786    return dropped;
1787}
1788
1789static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1790{
1791    unsigned int dropped = 0;
1792    VirtQueueElement elem = {};
1793    VirtIODevice *vdev = vq->vdev;
1794    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1795
1796    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1797        /* works similar to virtqueue_pop but does not map buffers
1798        * and does not allocate any memory */
1799        smp_rmb();
1800        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1801            break;
1802        }
1803        vq->inuse++;
1804        vq->last_avail_idx++;
1805        if (fEventIdx) {
1806            vring_set_avail_event(vq, vq->last_avail_idx);
1807        }
1808        /* immediately push the element, nothing to unmap
1809         * as both in_num and out_num are set to 0 */
1810        virtqueue_push(vq, &elem, 0);
1811        dropped++;
1812    }
1813
1814    return dropped;
1815}
1816
1817/* virtqueue_drop_all:
1818 * @vq: The #VirtQueue
1819 * Drops all queued buffers and indicates them to the guest
1820 * as if they are done. Useful when buffers can not be
1821 * processed but must be returned to the guest.
1822 */
1823unsigned int virtqueue_drop_all(VirtQueue *vq)
1824{
1825    struct VirtIODevice *vdev = vq->vdev;
1826
1827    if (virtio_device_disabled(vq->vdev)) {
1828        return 0;
1829    }
1830
1831    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1832        return virtqueue_packed_drop_all(vq);
1833    } else {
1834        return virtqueue_split_drop_all(vq);
1835    }
1836}
1837
1838/* Reading and writing a structure directly to QEMUFile is *awful*, but
1839 * it is what QEMU has always done by mistake.  We can change it sooner
1840 * or later by bumping the version number of the affected vm states.
1841 * In the meanwhile, since the in-memory layout of VirtQueueElement
1842 * has changed, we need to marshal to and from the layout that was
1843 * used before the change.
1844 */
1845typedef struct VirtQueueElementOld {
1846    unsigned int index;
1847    unsigned int out_num;
1848    unsigned int in_num;
1849    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1850    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1851    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1852    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1853} VirtQueueElementOld;
1854
1855void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1856{
1857    VirtQueueElement *elem;
1858    VirtQueueElementOld data;
1859    int i;
1860
1861    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1862
1863    /* TODO: teach all callers that this can fail, and return failure instead
1864     * of asserting here.
1865     * This is just one thing (there are probably more) that must be
1866     * fixed before we can allow NDEBUG compilation.
1867     */
1868    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1869    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1870
1871    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1872    elem->index = data.index;
1873
1874    for (i = 0; i < elem->in_num; i++) {
1875        elem->in_addr[i] = data.in_addr[i];
1876    }
1877
1878    for (i = 0; i < elem->out_num; i++) {
1879        elem->out_addr[i] = data.out_addr[i];
1880    }
1881
1882    for (i = 0; i < elem->in_num; i++) {
1883        /* Base is overwritten by virtqueue_map.  */
1884        elem->in_sg[i].iov_base = 0;
1885        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1886    }
1887
1888    for (i = 0; i < elem->out_num; i++) {
1889        /* Base is overwritten by virtqueue_map.  */
1890        elem->out_sg[i].iov_base = 0;
1891        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1892    }
1893
1894    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1895        qemu_get_be32s(f, &elem->ndescs);
1896    }
1897
1898    virtqueue_map(vdev, elem);
1899    return elem;
1900}
1901
1902void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1903                                VirtQueueElement *elem)
1904{
1905    VirtQueueElementOld data;
1906    int i;
1907
1908    memset(&data, 0, sizeof(data));
1909    data.index = elem->index;
1910    data.in_num = elem->in_num;
1911    data.out_num = elem->out_num;
1912
1913    for (i = 0; i < elem->in_num; i++) {
1914        data.in_addr[i] = elem->in_addr[i];
1915    }
1916
1917    for (i = 0; i < elem->out_num; i++) {
1918        data.out_addr[i] = elem->out_addr[i];
1919    }
1920
1921    for (i = 0; i < elem->in_num; i++) {
1922        /* Base is overwritten by virtqueue_map when loading.  Do not
1923         * save it, as it would leak the QEMU address space layout.  */
1924        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1925    }
1926
1927    for (i = 0; i < elem->out_num; i++) {
1928        /* Do not save iov_base as above.  */
1929        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1930    }
1931
1932    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1933        qemu_put_be32s(f, &elem->ndescs);
1934    }
1935
1936    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1937}
1938
1939/* virtio device */
1940static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1941{
1942    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1943    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1944
1945    if (virtio_device_disabled(vdev)) {
1946        return;
1947    }
1948
1949    if (k->notify) {
1950        k->notify(qbus->parent, vector);
1951    }
1952}
1953
1954void virtio_update_irq(VirtIODevice *vdev)
1955{
1956    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1957}
1958
1959static int virtio_validate_features(VirtIODevice *vdev)
1960{
1961    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1962
1963    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1964        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1965        return -EFAULT;
1966    }
1967
1968    if (k->validate_features) {
1969        return k->validate_features(vdev);
1970    } else {
1971        return 0;
1972    }
1973}
1974
1975int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1976{
1977    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1978    trace_virtio_set_status(vdev, val);
1979
1980    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1981        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1982            val & VIRTIO_CONFIG_S_FEATURES_OK) {
1983            int ret = virtio_validate_features(vdev);
1984
1985            if (ret) {
1986                return ret;
1987            }
1988        }
1989    }
1990
1991    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1992        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1993        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1994    }
1995
1996    if (k->set_status) {
1997        k->set_status(vdev, val);
1998    }
1999    vdev->status = val;
2000
2001    return 0;
2002}
2003
2004static enum virtio_device_endian virtio_default_endian(void)
2005{
2006    if (target_words_bigendian()) {
2007        return VIRTIO_DEVICE_ENDIAN_BIG;
2008    } else {
2009        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2010    }
2011}
2012
2013static enum virtio_device_endian virtio_current_cpu_endian(void)
2014{
2015    if (cpu_virtio_is_big_endian(current_cpu)) {
2016        return VIRTIO_DEVICE_ENDIAN_BIG;
2017    } else {
2018        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2019    }
2020}
2021
2022void virtio_reset(void *opaque)
2023{
2024    VirtIODevice *vdev = opaque;
2025    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2026    int i;
2027
2028    virtio_set_status(vdev, 0);
2029    if (current_cpu) {
2030        /* Guest initiated reset */
2031        vdev->device_endian = virtio_current_cpu_endian();
2032    } else {
2033        /* System reset */
2034        vdev->device_endian = virtio_default_endian();
2035    }
2036
2037    if (k->reset) {
2038        k->reset(vdev);
2039    }
2040
2041    vdev->start_on_kick = false;
2042    vdev->started = false;
2043    vdev->broken = false;
2044    vdev->guest_features = 0;
2045    vdev->queue_sel = 0;
2046    vdev->status = 0;
2047    vdev->disabled = false;
2048    qatomic_set(&vdev->isr, 0);
2049    vdev->config_vector = VIRTIO_NO_VECTOR;
2050    virtio_notify_vector(vdev, vdev->config_vector);
2051
2052    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2053        vdev->vq[i].vring.desc = 0;
2054        vdev->vq[i].vring.avail = 0;
2055        vdev->vq[i].vring.used = 0;
2056        vdev->vq[i].last_avail_idx = 0;
2057        vdev->vq[i].shadow_avail_idx = 0;
2058        vdev->vq[i].used_idx = 0;
2059        vdev->vq[i].last_avail_wrap_counter = true;
2060        vdev->vq[i].shadow_avail_wrap_counter = true;
2061        vdev->vq[i].used_wrap_counter = true;
2062        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2063        vdev->vq[i].signalled_used = 0;
2064        vdev->vq[i].signalled_used_valid = false;
2065        vdev->vq[i].notification = true;
2066        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2067        vdev->vq[i].inuse = 0;
2068        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2069    }
2070}
2071
2072uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2073{
2074    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2075    uint8_t val;
2076
2077    if (addr + sizeof(val) > vdev->config_len) {
2078        return (uint32_t)-1;
2079    }
2080
2081    k->get_config(vdev, vdev->config);
2082
2083    val = ldub_p(vdev->config + addr);
2084    return val;
2085}
2086
2087uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2088{
2089    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2090    uint16_t val;
2091
2092    if (addr + sizeof(val) > vdev->config_len) {
2093        return (uint32_t)-1;
2094    }
2095
2096    k->get_config(vdev, vdev->config);
2097
2098    val = lduw_p(vdev->config + addr);
2099    return val;
2100}
2101
2102uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2103{
2104    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2105    uint32_t val;
2106
2107    if (addr + sizeof(val) > vdev->config_len) {
2108        return (uint32_t)-1;
2109    }
2110
2111    k->get_config(vdev, vdev->config);
2112
2113    val = ldl_p(vdev->config + addr);
2114    return val;
2115}
2116
2117void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2118{
2119    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2120    uint8_t val = data;
2121
2122    if (addr + sizeof(val) > vdev->config_len) {
2123        return;
2124    }
2125
2126    stb_p(vdev->config + addr, val);
2127
2128    if (k->set_config) {
2129        k->set_config(vdev, vdev->config);
2130    }
2131}
2132
2133void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2134{
2135    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2136    uint16_t val = data;
2137
2138    if (addr + sizeof(val) > vdev->config_len) {
2139        return;
2140    }
2141
2142    stw_p(vdev->config + addr, val);
2143
2144    if (k->set_config) {
2145        k->set_config(vdev, vdev->config);
2146    }
2147}
2148
2149void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2150{
2151    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2152    uint32_t val = data;
2153
2154    if (addr + sizeof(val) > vdev->config_len) {
2155        return;
2156    }
2157
2158    stl_p(vdev->config + addr, val);
2159
2160    if (k->set_config) {
2161        k->set_config(vdev, vdev->config);
2162    }
2163}
2164
2165uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2166{
2167    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2168    uint8_t val;
2169
2170    if (addr + sizeof(val) > vdev->config_len) {
2171        return (uint32_t)-1;
2172    }
2173
2174    k->get_config(vdev, vdev->config);
2175
2176    val = ldub_p(vdev->config + addr);
2177    return val;
2178}
2179
2180uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2181{
2182    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2183    uint16_t val;
2184
2185    if (addr + sizeof(val) > vdev->config_len) {
2186        return (uint32_t)-1;
2187    }
2188
2189    k->get_config(vdev, vdev->config);
2190
2191    val = lduw_le_p(vdev->config + addr);
2192    return val;
2193}
2194
2195uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2196{
2197    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2198    uint32_t val;
2199
2200    if (addr + sizeof(val) > vdev->config_len) {
2201        return (uint32_t)-1;
2202    }
2203
2204    k->get_config(vdev, vdev->config);
2205
2206    val = ldl_le_p(vdev->config + addr);
2207    return val;
2208}
2209
2210void virtio_config_modern_writeb(VirtIODevice *vdev,
2211                                 uint32_t addr, uint32_t data)
2212{
2213    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2214    uint8_t val = data;
2215
2216    if (addr + sizeof(val) > vdev->config_len) {
2217        return;
2218    }
2219
2220    stb_p(vdev->config + addr, val);
2221
2222    if (k->set_config) {
2223        k->set_config(vdev, vdev->config);
2224    }
2225}
2226
2227void virtio_config_modern_writew(VirtIODevice *vdev,
2228                                 uint32_t addr, uint32_t data)
2229{
2230    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2231    uint16_t val = data;
2232
2233    if (addr + sizeof(val) > vdev->config_len) {
2234        return;
2235    }
2236
2237    stw_le_p(vdev->config + addr, val);
2238
2239    if (k->set_config) {
2240        k->set_config(vdev, vdev->config);
2241    }
2242}
2243
2244void virtio_config_modern_writel(VirtIODevice *vdev,
2245                                 uint32_t addr, uint32_t data)
2246{
2247    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2248    uint32_t val = data;
2249
2250    if (addr + sizeof(val) > vdev->config_len) {
2251        return;
2252    }
2253
2254    stl_le_p(vdev->config + addr, val);
2255
2256    if (k->set_config) {
2257        k->set_config(vdev, vdev->config);
2258    }
2259}
2260
2261void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2262{
2263    if (!vdev->vq[n].vring.num) {
2264        return;
2265    }
2266    vdev->vq[n].vring.desc = addr;
2267    virtio_queue_update_rings(vdev, n);
2268}
2269
2270hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2271{
2272    return vdev->vq[n].vring.desc;
2273}
2274
2275void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2276                            hwaddr avail, hwaddr used)
2277{
2278    if (!vdev->vq[n].vring.num) {
2279        return;
2280    }
2281    vdev->vq[n].vring.desc = desc;
2282    vdev->vq[n].vring.avail = avail;
2283    vdev->vq[n].vring.used = used;
2284    virtio_init_region_cache(vdev, n);
2285}
2286
2287void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2288{
2289    /* Don't allow guest to flip queue between existent and
2290     * nonexistent states, or to set it to an invalid size.
2291     */
2292    if (!!num != !!vdev->vq[n].vring.num ||
2293        num > VIRTQUEUE_MAX_SIZE ||
2294        num < 0) {
2295        return;
2296    }
2297    vdev->vq[n].vring.num = num;
2298}
2299
2300VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2301{
2302    return QLIST_FIRST(&vdev->vector_queues[vector]);
2303}
2304
2305VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2306{
2307    return QLIST_NEXT(vq, node);
2308}
2309
2310int virtio_queue_get_num(VirtIODevice *vdev, int n)
2311{
2312    return vdev->vq[n].vring.num;
2313}
2314
2315int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2316{
2317    return vdev->vq[n].vring.num_default;
2318}
2319
2320int virtio_get_num_queues(VirtIODevice *vdev)
2321{
2322    int i;
2323
2324    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2325        if (!virtio_queue_get_num(vdev, i)) {
2326            break;
2327        }
2328    }
2329
2330    return i;
2331}
2332
2333void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2334{
2335    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2336    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2337
2338    /* virtio-1 compliant devices cannot change the alignment */
2339    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2340        error_report("tried to modify queue alignment for virtio-1 device");
2341        return;
2342    }
2343    /* Check that the transport told us it was going to do this
2344     * (so a buggy transport will immediately assert rather than
2345     * silently failing to migrate this state)
2346     */
2347    assert(k->has_variable_vring_alignment);
2348
2349    if (align) {
2350        vdev->vq[n].vring.align = align;
2351        virtio_queue_update_rings(vdev, n);
2352    }
2353}
2354
2355static void virtio_queue_notify_vq(VirtQueue *vq)
2356{
2357    if (vq->vring.desc && vq->handle_output) {
2358        VirtIODevice *vdev = vq->vdev;
2359
2360        if (unlikely(vdev->broken)) {
2361            return;
2362        }
2363
2364        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2365        vq->handle_output(vdev, vq);
2366
2367        if (unlikely(vdev->start_on_kick)) {
2368            virtio_set_started(vdev, true);
2369        }
2370    }
2371}
2372
2373void virtio_queue_notify(VirtIODevice *vdev, int n)
2374{
2375    VirtQueue *vq = &vdev->vq[n];
2376
2377    if (unlikely(!vq->vring.desc || vdev->broken)) {
2378        return;
2379    }
2380
2381    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2382    if (vq->host_notifier_enabled) {
2383        event_notifier_set(&vq->host_notifier);
2384    } else if (vq->handle_output) {
2385        vq->handle_output(vdev, vq);
2386
2387        if (unlikely(vdev->start_on_kick)) {
2388            virtio_set_started(vdev, true);
2389        }
2390    }
2391}
2392
2393uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2394{
2395    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2396        VIRTIO_NO_VECTOR;
2397}
2398
2399void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2400{
2401    VirtQueue *vq = &vdev->vq[n];
2402
2403    if (n < VIRTIO_QUEUE_MAX) {
2404        if (vdev->vector_queues &&
2405            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2406            QLIST_REMOVE(vq, node);
2407        }
2408        vdev->vq[n].vector = vector;
2409        if (vdev->vector_queues &&
2410            vector != VIRTIO_NO_VECTOR) {
2411            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2412        }
2413    }
2414}
2415
2416VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2417                            VirtIOHandleOutput handle_output)
2418{
2419    int i;
2420
2421    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2422        if (vdev->vq[i].vring.num == 0)
2423            break;
2424    }
2425
2426    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2427        abort();
2428
2429    vdev->vq[i].vring.num = queue_size;
2430    vdev->vq[i].vring.num_default = queue_size;
2431    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2432    vdev->vq[i].handle_output = handle_output;
2433    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2434
2435    return &vdev->vq[i];
2436}
2437
2438void virtio_delete_queue(VirtQueue *vq)
2439{
2440    vq->vring.num = 0;
2441    vq->vring.num_default = 0;
2442    vq->handle_output = NULL;
2443    g_free(vq->used_elems);
2444    vq->used_elems = NULL;
2445    virtio_virtqueue_reset_region_cache(vq);
2446}
2447
2448void virtio_del_queue(VirtIODevice *vdev, int n)
2449{
2450    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2451        abort();
2452    }
2453
2454    virtio_delete_queue(&vdev->vq[n]);
2455}
2456
2457static void virtio_set_isr(VirtIODevice *vdev, int value)
2458{
2459    uint8_t old = qatomic_read(&vdev->isr);
2460
2461    /* Do not write ISR if it does not change, so that its cacheline remains
2462     * shared in the common case where the guest does not read it.
2463     */
2464    if ((old & value) != value) {
2465        qatomic_or(&vdev->isr, value);
2466    }
2467}
2468
2469/* Called within rcu_read_lock(). */
2470static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2471{
2472    uint16_t old, new;
2473    bool v;
2474    /* We need to expose used array entries before checking used event. */
2475    smp_mb();
2476    /* Always notify when queue is empty (when feature acknowledge) */
2477    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2478        !vq->inuse && virtio_queue_empty(vq)) {
2479        return true;
2480    }
2481
2482    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2483        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2484    }
2485
2486    v = vq->signalled_used_valid;
2487    vq->signalled_used_valid = true;
2488    old = vq->signalled_used;
2489    new = vq->signalled_used = vq->used_idx;
2490    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2491}
2492
2493static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2494                                    uint16_t off_wrap, uint16_t new,
2495                                    uint16_t old)
2496{
2497    int off = off_wrap & ~(1 << 15);
2498
2499    if (wrap != off_wrap >> 15) {
2500        off -= vq->vring.num;
2501    }
2502
2503    return vring_need_event(off, new, old);
2504}
2505
2506/* Called within rcu_read_lock(). */
2507static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2508{
2509    VRingPackedDescEvent e;
2510    uint16_t old, new;
2511    bool v;
2512    VRingMemoryRegionCaches *caches;
2513
2514    caches = vring_get_region_caches(vq);
2515    if (!caches) {
2516        return false;
2517    }
2518
2519    vring_packed_event_read(vdev, &caches->avail, &e);
2520
2521    old = vq->signalled_used;
2522    new = vq->signalled_used = vq->used_idx;
2523    v = vq->signalled_used_valid;
2524    vq->signalled_used_valid = true;
2525
2526    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2527        return false;
2528    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2529        return true;
2530    }
2531
2532    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2533                                         e.off_wrap, new, old);
2534}
2535
2536/* Called within rcu_read_lock().  */
2537static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2538{
2539    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2540        return virtio_packed_should_notify(vdev, vq);
2541    } else {
2542        return virtio_split_should_notify(vdev, vq);
2543    }
2544}
2545
2546void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2547{
2548    WITH_RCU_READ_LOCK_GUARD() {
2549        if (!virtio_should_notify(vdev, vq)) {
2550            return;
2551        }
2552    }
2553
2554    trace_virtio_notify_irqfd(vdev, vq);
2555
2556    /*
2557     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2558     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2559     * incorrectly polling this bit during crashdump and hibernation
2560     * in MSI mode, causing a hang if this bit is never updated.
2561     * Recent releases of Windows do not really shut down, but rather
2562     * log out and hibernate to make the next startup faster.  Hence,
2563     * this manifested as a more serious hang during shutdown with
2564     *
2565     * Next driver release from 2016 fixed this problem, so working around it
2566     * is not a must, but it's easy to do so let's do it here.
2567     *
2568     * Note: it's safe to update ISR from any thread as it was switched
2569     * to an atomic operation.
2570     */
2571    virtio_set_isr(vq->vdev, 0x1);
2572    event_notifier_set(&vq->guest_notifier);
2573}
2574
2575static void virtio_irq(VirtQueue *vq)
2576{
2577    virtio_set_isr(vq->vdev, 0x1);
2578    virtio_notify_vector(vq->vdev, vq->vector);
2579}
2580
2581void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2582{
2583    WITH_RCU_READ_LOCK_GUARD() {
2584        if (!virtio_should_notify(vdev, vq)) {
2585            return;
2586        }
2587    }
2588
2589    trace_virtio_notify(vdev, vq);
2590    virtio_irq(vq);
2591}
2592
2593void virtio_notify_config(VirtIODevice *vdev)
2594{
2595    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2596        return;
2597
2598    virtio_set_isr(vdev, 0x3);
2599    vdev->generation++;
2600    virtio_notify_vector(vdev, vdev->config_vector);
2601}
2602
2603static bool virtio_device_endian_needed(void *opaque)
2604{
2605    VirtIODevice *vdev = opaque;
2606
2607    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2608    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2609        return vdev->device_endian != virtio_default_endian();
2610    }
2611    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2612    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2613}
2614
2615static bool virtio_64bit_features_needed(void *opaque)
2616{
2617    VirtIODevice *vdev = opaque;
2618
2619    return (vdev->host_features >> 32) != 0;
2620}
2621
2622static bool virtio_virtqueue_needed(void *opaque)
2623{
2624    VirtIODevice *vdev = opaque;
2625
2626    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2627}
2628
2629static bool virtio_packed_virtqueue_needed(void *opaque)
2630{
2631    VirtIODevice *vdev = opaque;
2632
2633    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2634}
2635
2636static bool virtio_ringsize_needed(void *opaque)
2637{
2638    VirtIODevice *vdev = opaque;
2639    int i;
2640
2641    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2642        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2643            return true;
2644        }
2645    }
2646    return false;
2647}
2648
2649static bool virtio_extra_state_needed(void *opaque)
2650{
2651    VirtIODevice *vdev = opaque;
2652    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2653    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2654
2655    return k->has_extra_state &&
2656        k->has_extra_state(qbus->parent);
2657}
2658
2659static bool virtio_broken_needed(void *opaque)
2660{
2661    VirtIODevice *vdev = opaque;
2662
2663    return vdev->broken;
2664}
2665
2666static bool virtio_started_needed(void *opaque)
2667{
2668    VirtIODevice *vdev = opaque;
2669
2670    return vdev->started;
2671}
2672
2673static bool virtio_disabled_needed(void *opaque)
2674{
2675    VirtIODevice *vdev = opaque;
2676
2677    return vdev->disabled;
2678}
2679
2680static const VMStateDescription vmstate_virtqueue = {
2681    .name = "virtqueue_state",
2682    .version_id = 1,
2683    .minimum_version_id = 1,
2684    .fields = (VMStateField[]) {
2685        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2686        VMSTATE_UINT64(vring.used, struct VirtQueue),
2687        VMSTATE_END_OF_LIST()
2688    }
2689};
2690
2691static const VMStateDescription vmstate_packed_virtqueue = {
2692    .name = "packed_virtqueue_state",
2693    .version_id = 1,
2694    .minimum_version_id = 1,
2695    .fields = (VMStateField[]) {
2696        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2697        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2698        VMSTATE_UINT16(used_idx, struct VirtQueue),
2699        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2700        VMSTATE_UINT32(inuse, struct VirtQueue),
2701        VMSTATE_END_OF_LIST()
2702    }
2703};
2704
2705static const VMStateDescription vmstate_virtio_virtqueues = {
2706    .name = "virtio/virtqueues",
2707    .version_id = 1,
2708    .minimum_version_id = 1,
2709    .needed = &virtio_virtqueue_needed,
2710    .fields = (VMStateField[]) {
2711        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2712                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2713        VMSTATE_END_OF_LIST()
2714    }
2715};
2716
2717static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2718    .name = "virtio/packed_virtqueues",
2719    .version_id = 1,
2720    .minimum_version_id = 1,
2721    .needed = &virtio_packed_virtqueue_needed,
2722    .fields = (VMStateField[]) {
2723        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2724                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2725        VMSTATE_END_OF_LIST()
2726    }
2727};
2728
2729static const VMStateDescription vmstate_ringsize = {
2730    .name = "ringsize_state",
2731    .version_id = 1,
2732    .minimum_version_id = 1,
2733    .fields = (VMStateField[]) {
2734        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2735        VMSTATE_END_OF_LIST()
2736    }
2737};
2738
2739static const VMStateDescription vmstate_virtio_ringsize = {
2740    .name = "virtio/ringsize",
2741    .version_id = 1,
2742    .minimum_version_id = 1,
2743    .needed = &virtio_ringsize_needed,
2744    .fields = (VMStateField[]) {
2745        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2746                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2747        VMSTATE_END_OF_LIST()
2748    }
2749};
2750
2751static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2752                           const VMStateField *field)
2753{
2754    VirtIODevice *vdev = pv;
2755    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2756    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2757
2758    if (!k->load_extra_state) {
2759        return -1;
2760    } else {
2761        return k->load_extra_state(qbus->parent, f);
2762    }
2763}
2764
2765static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2766                           const VMStateField *field, JSONWriter *vmdesc)
2767{
2768    VirtIODevice *vdev = pv;
2769    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2770    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2771
2772    k->save_extra_state(qbus->parent, f);
2773    return 0;
2774}
2775
2776static const VMStateInfo vmstate_info_extra_state = {
2777    .name = "virtqueue_extra_state",
2778    .get = get_extra_state,
2779    .put = put_extra_state,
2780};
2781
2782static const VMStateDescription vmstate_virtio_extra_state = {
2783    .name = "virtio/extra_state",
2784    .version_id = 1,
2785    .minimum_version_id = 1,
2786    .needed = &virtio_extra_state_needed,
2787    .fields = (VMStateField[]) {
2788        {
2789            .name         = "extra_state",
2790            .version_id   = 0,
2791            .field_exists = NULL,
2792            .size         = 0,
2793            .info         = &vmstate_info_extra_state,
2794            .flags        = VMS_SINGLE,
2795            .offset       = 0,
2796        },
2797        VMSTATE_END_OF_LIST()
2798    }
2799};
2800
2801static const VMStateDescription vmstate_virtio_device_endian = {
2802    .name = "virtio/device_endian",
2803    .version_id = 1,
2804    .minimum_version_id = 1,
2805    .needed = &virtio_device_endian_needed,
2806    .fields = (VMStateField[]) {
2807        VMSTATE_UINT8(device_endian, VirtIODevice),
2808        VMSTATE_END_OF_LIST()
2809    }
2810};
2811
2812static const VMStateDescription vmstate_virtio_64bit_features = {
2813    .name = "virtio/64bit_features",
2814    .version_id = 1,
2815    .minimum_version_id = 1,
2816    .needed = &virtio_64bit_features_needed,
2817    .fields = (VMStateField[]) {
2818        VMSTATE_UINT64(guest_features, VirtIODevice),
2819        VMSTATE_END_OF_LIST()
2820    }
2821};
2822
2823static const VMStateDescription vmstate_virtio_broken = {
2824    .name = "virtio/broken",
2825    .version_id = 1,
2826    .minimum_version_id = 1,
2827    .needed = &virtio_broken_needed,
2828    .fields = (VMStateField[]) {
2829        VMSTATE_BOOL(broken, VirtIODevice),
2830        VMSTATE_END_OF_LIST()
2831    }
2832};
2833
2834static const VMStateDescription vmstate_virtio_started = {
2835    .name = "virtio/started",
2836    .version_id = 1,
2837    .minimum_version_id = 1,
2838    .needed = &virtio_started_needed,
2839    .fields = (VMStateField[]) {
2840        VMSTATE_BOOL(started, VirtIODevice),
2841        VMSTATE_END_OF_LIST()
2842    }
2843};
2844
2845static const VMStateDescription vmstate_virtio_disabled = {
2846    .name = "virtio/disabled",
2847    .version_id = 1,
2848    .minimum_version_id = 1,
2849    .needed = &virtio_disabled_needed,
2850    .fields = (VMStateField[]) {
2851        VMSTATE_BOOL(disabled, VirtIODevice),
2852        VMSTATE_END_OF_LIST()
2853    }
2854};
2855
2856static const VMStateDescription vmstate_virtio = {
2857    .name = "virtio",
2858    .version_id = 1,
2859    .minimum_version_id = 1,
2860    .fields = (VMStateField[]) {
2861        VMSTATE_END_OF_LIST()
2862    },
2863    .subsections = (const VMStateDescription*[]) {
2864        &vmstate_virtio_device_endian,
2865        &vmstate_virtio_64bit_features,
2866        &vmstate_virtio_virtqueues,
2867        &vmstate_virtio_ringsize,
2868        &vmstate_virtio_broken,
2869        &vmstate_virtio_extra_state,
2870        &vmstate_virtio_started,
2871        &vmstate_virtio_packed_virtqueues,
2872        &vmstate_virtio_disabled,
2873        NULL
2874    }
2875};
2876
2877int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2878{
2879    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2880    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2881    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2882    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2883    int i;
2884
2885    if (k->save_config) {
2886        k->save_config(qbus->parent, f);
2887    }
2888
2889    qemu_put_8s(f, &vdev->status);
2890    qemu_put_8s(f, &vdev->isr);
2891    qemu_put_be16s(f, &vdev->queue_sel);
2892    qemu_put_be32s(f, &guest_features_lo);
2893    qemu_put_be32(f, vdev->config_len);
2894    qemu_put_buffer(f, vdev->config, vdev->config_len);
2895
2896    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2897        if (vdev->vq[i].vring.num == 0)
2898            break;
2899    }
2900
2901    qemu_put_be32(f, i);
2902
2903    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2904        if (vdev->vq[i].vring.num == 0)
2905            break;
2906
2907        qemu_put_be32(f, vdev->vq[i].vring.num);
2908        if (k->has_variable_vring_alignment) {
2909            qemu_put_be32(f, vdev->vq[i].vring.align);
2910        }
2911        /*
2912         * Save desc now, the rest of the ring addresses are saved in
2913         * subsections for VIRTIO-1 devices.
2914         */
2915        qemu_put_be64(f, vdev->vq[i].vring.desc);
2916        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2917        if (k->save_queue) {
2918            k->save_queue(qbus->parent, i, f);
2919        }
2920    }
2921
2922    if (vdc->save != NULL) {
2923        vdc->save(vdev, f);
2924    }
2925
2926    if (vdc->vmsd) {
2927        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2928        if (ret) {
2929            return ret;
2930        }
2931    }
2932
2933    /* Subsections */
2934    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2935}
2936
2937/* A wrapper for use as a VMState .put function */
2938static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2939                              const VMStateField *field, JSONWriter *vmdesc)
2940{
2941    return virtio_save(VIRTIO_DEVICE(opaque), f);
2942}
2943
2944/* A wrapper for use as a VMState .get function */
2945static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2946                             const VMStateField *field)
2947{
2948    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2949    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2950
2951    return virtio_load(vdev, f, dc->vmsd->version_id);
2952}
2953
2954const VMStateInfo  virtio_vmstate_info = {
2955    .name = "virtio",
2956    .get = virtio_device_get,
2957    .put = virtio_device_put,
2958};
2959
2960static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2961{
2962    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2963    bool bad = (val & ~(vdev->host_features)) != 0;
2964
2965    val &= vdev->host_features;
2966    if (k->set_features) {
2967        k->set_features(vdev, val);
2968    }
2969    vdev->guest_features = val;
2970    return bad ? -1 : 0;
2971}
2972
2973int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2974{
2975    int ret;
2976    /*
2977     * The driver must not attempt to set features after feature negotiation
2978     * has finished.
2979     */
2980    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2981        return -EINVAL;
2982    }
2983    ret = virtio_set_features_nocheck(vdev, val);
2984    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2985        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2986        int i;
2987        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2988            if (vdev->vq[i].vring.num != 0) {
2989                virtio_init_region_cache(vdev, i);
2990            }
2991        }
2992    }
2993    if (!ret) {
2994        if (!virtio_device_started(vdev, vdev->status) &&
2995            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2996            vdev->start_on_kick = true;
2997        }
2998    }
2999    return ret;
3000}
3001
3002size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
3003                                      uint64_t host_features)
3004{
3005    size_t config_size = 0;
3006    int i;
3007
3008    for (i = 0; feature_sizes[i].flags != 0; i++) {
3009        if (host_features & feature_sizes[i].flags) {
3010            config_size = MAX(feature_sizes[i].end, config_size);
3011        }
3012    }
3013
3014    return config_size;
3015}
3016
3017int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3018{
3019    int i, ret;
3020    int32_t config_len;
3021    uint32_t num;
3022    uint32_t features;
3023    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3024    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3025    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3026
3027    /*
3028     * We poison the endianness to ensure it does not get used before
3029     * subsections have been loaded.
3030     */
3031    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3032
3033    if (k->load_config) {
3034        ret = k->load_config(qbus->parent, f);
3035        if (ret)
3036            return ret;
3037    }
3038
3039    qemu_get_8s(f, &vdev->status);
3040    qemu_get_8s(f, &vdev->isr);
3041    qemu_get_be16s(f, &vdev->queue_sel);
3042    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3043        return -1;
3044    }
3045    qemu_get_be32s(f, &features);
3046
3047    /*
3048     * Temporarily set guest_features low bits - needed by
3049     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3050     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3051     *
3052     * Note: devices should always test host features in future - don't create
3053     * new dependencies like this.
3054     */
3055    vdev->guest_features = features;
3056
3057    config_len = qemu_get_be32(f);
3058
3059    /*
3060     * There are cases where the incoming config can be bigger or smaller
3061     * than what we have; so load what we have space for, and skip
3062     * any excess that's in the stream.
3063     */
3064    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3065
3066    while (config_len > vdev->config_len) {
3067        qemu_get_byte(f);
3068        config_len--;
3069    }
3070
3071    num = qemu_get_be32(f);
3072
3073    if (num > VIRTIO_QUEUE_MAX) {
3074        error_report("Invalid number of virtqueues: 0x%x", num);
3075        return -1;
3076    }
3077
3078    for (i = 0; i < num; i++) {
3079        vdev->vq[i].vring.num = qemu_get_be32(f);
3080        if (k->has_variable_vring_alignment) {
3081            vdev->vq[i].vring.align = qemu_get_be32(f);
3082        }
3083        vdev->vq[i].vring.desc = qemu_get_be64(f);
3084        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3085        vdev->vq[i].signalled_used_valid = false;
3086        vdev->vq[i].notification = true;
3087
3088        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3089            error_report("VQ %d address 0x0 "
3090                         "inconsistent with Host index 0x%x",
3091                         i, vdev->vq[i].last_avail_idx);
3092            return -1;
3093        }
3094        if (k->load_queue) {
3095            ret = k->load_queue(qbus->parent, i, f);
3096            if (ret)
3097                return ret;
3098        }
3099    }
3100
3101    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3102
3103    if (vdc->load != NULL) {
3104        ret = vdc->load(vdev, f, version_id);
3105        if (ret) {
3106            return ret;
3107        }
3108    }
3109
3110    if (vdc->vmsd) {
3111        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3112        if (ret) {
3113            return ret;
3114        }
3115    }
3116
3117    /* Subsections */
3118    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3119    if (ret) {
3120        return ret;
3121    }
3122
3123    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3124        vdev->device_endian = virtio_default_endian();
3125    }
3126
3127    if (virtio_64bit_features_needed(vdev)) {
3128        /*
3129         * Subsection load filled vdev->guest_features.  Run them
3130         * through virtio_set_features to sanity-check them against
3131         * host_features.
3132         */
3133        uint64_t features64 = vdev->guest_features;
3134        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3135            error_report("Features 0x%" PRIx64 " unsupported. "
3136                         "Allowed features: 0x%" PRIx64,
3137                         features64, vdev->host_features);
3138            return -1;
3139        }
3140    } else {
3141        if (virtio_set_features_nocheck(vdev, features) < 0) {
3142            error_report("Features 0x%x unsupported. "
3143                         "Allowed features: 0x%" PRIx64,
3144                         features, vdev->host_features);
3145            return -1;
3146        }
3147    }
3148
3149    if (!virtio_device_started(vdev, vdev->status) &&
3150        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3151        vdev->start_on_kick = true;
3152    }
3153
3154    RCU_READ_LOCK_GUARD();
3155    for (i = 0; i < num; i++) {
3156        if (vdev->vq[i].vring.desc) {
3157            uint16_t nheads;
3158
3159            /*
3160             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3161             * only the region cache needs to be set up.  Legacy devices need
3162             * to calculate used and avail ring addresses based on the desc
3163             * address.
3164             */
3165            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3166                virtio_init_region_cache(vdev, i);
3167            } else {
3168                virtio_queue_update_rings(vdev, i);
3169            }
3170
3171            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3172                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3173                vdev->vq[i].shadow_avail_wrap_counter =
3174                                        vdev->vq[i].last_avail_wrap_counter;
3175                continue;
3176            }
3177
3178            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3179            /* Check it isn't doing strange things with descriptor numbers. */
3180            if (nheads > vdev->vq[i].vring.num) {
3181                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3182                             "inconsistent with Host index 0x%x: delta 0x%x",
3183                             i, vdev->vq[i].vring.num,
3184                             vring_avail_idx(&vdev->vq[i]),
3185                             vdev->vq[i].last_avail_idx, nheads);
3186                vdev->vq[i].used_idx = 0;
3187                vdev->vq[i].shadow_avail_idx = 0;
3188                vdev->vq[i].inuse = 0;
3189                continue;
3190            }
3191            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3192            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3193
3194            /*
3195             * Some devices migrate VirtQueueElements that have been popped
3196             * from the avail ring but not yet returned to the used ring.
3197             * Since max ring size < UINT16_MAX it's safe to use modulo
3198             * UINT16_MAX + 1 subtraction.
3199             */
3200            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3201                                vdev->vq[i].used_idx);
3202            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3203                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3204                             "used_idx 0x%x",
3205                             i, vdev->vq[i].vring.num,
3206                             vdev->vq[i].last_avail_idx,
3207                             vdev->vq[i].used_idx);
3208                return -1;
3209            }
3210        }
3211    }
3212
3213    if (vdc->post_load) {
3214        ret = vdc->post_load(vdev);
3215        if (ret) {
3216            return ret;
3217        }
3218    }
3219
3220    return 0;
3221}
3222
3223void virtio_cleanup(VirtIODevice *vdev)
3224{
3225    qemu_del_vm_change_state_handler(vdev->vmstate);
3226}
3227
3228static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3229{
3230    VirtIODevice *vdev = opaque;
3231    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3232    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3233    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3234    vdev->vm_running = running;
3235
3236    if (backend_run) {
3237        virtio_set_status(vdev, vdev->status);
3238    }
3239
3240    if (k->vmstate_change) {
3241        k->vmstate_change(qbus->parent, backend_run);
3242    }
3243
3244    if (!backend_run) {
3245        virtio_set_status(vdev, vdev->status);
3246    }
3247}
3248
3249void virtio_instance_init_common(Object *proxy_obj, void *data,
3250                                 size_t vdev_size, const char *vdev_name)
3251{
3252    DeviceState *vdev = data;
3253
3254    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3255                                       vdev_size, vdev_name, &error_abort,
3256                                       NULL);
3257    qdev_alias_all_properties(vdev, proxy_obj);
3258}
3259
3260void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3261{
3262    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3263    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3264    int i;
3265    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3266
3267    if (nvectors) {
3268        vdev->vector_queues =
3269            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3270    }
3271
3272    vdev->start_on_kick = false;
3273    vdev->started = false;
3274    vdev->vhost_started = false;
3275    vdev->device_id = device_id;
3276    vdev->status = 0;
3277    qatomic_set(&vdev->isr, 0);
3278    vdev->queue_sel = 0;
3279    vdev->config_vector = VIRTIO_NO_VECTOR;
3280    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3281    vdev->vm_running = runstate_is_running();
3282    vdev->broken = false;
3283    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3284        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3285        vdev->vq[i].vdev = vdev;
3286        vdev->vq[i].queue_index = i;
3287        vdev->vq[i].host_notifier_enabled = false;
3288    }
3289
3290    vdev->name = virtio_id_to_name(device_id);
3291    vdev->config_len = config_size;
3292    if (vdev->config_len) {
3293        vdev->config = g_malloc0(config_size);
3294    } else {
3295        vdev->config = NULL;
3296    }
3297    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3298            virtio_vmstate_change, vdev);
3299    vdev->device_endian = virtio_default_endian();
3300    vdev->use_guest_notifier_mask = true;
3301}
3302
3303/*
3304 * Only devices that have already been around prior to defining the virtio
3305 * standard support legacy mode; this includes devices not specified in the
3306 * standard. All newer devices conform to the virtio standard only.
3307 */
3308bool virtio_legacy_allowed(VirtIODevice *vdev)
3309{
3310    switch (vdev->device_id) {
3311    case VIRTIO_ID_NET:
3312    case VIRTIO_ID_BLOCK:
3313    case VIRTIO_ID_CONSOLE:
3314    case VIRTIO_ID_RNG:
3315    case VIRTIO_ID_BALLOON:
3316    case VIRTIO_ID_RPMSG:
3317    case VIRTIO_ID_SCSI:
3318    case VIRTIO_ID_9P:
3319    case VIRTIO_ID_RPROC_SERIAL:
3320    case VIRTIO_ID_CAIF:
3321        return true;
3322    default:
3323        return false;
3324    }
3325}
3326
3327bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3328{
3329    return vdev->disable_legacy_check;
3330}
3331
3332hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3333{
3334    return vdev->vq[n].vring.desc;
3335}
3336
3337bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3338{
3339    return virtio_queue_get_desc_addr(vdev, n) != 0;
3340}
3341
3342bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3343{
3344    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3345    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3346
3347    if (k->queue_enabled) {
3348        return k->queue_enabled(qbus->parent, n);
3349    }
3350    return virtio_queue_enabled_legacy(vdev, n);
3351}
3352
3353hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3354{
3355    return vdev->vq[n].vring.avail;
3356}
3357
3358hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3359{
3360    return vdev->vq[n].vring.used;
3361}
3362
3363hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3364{
3365    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3366}
3367
3368hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3369{
3370    int s;
3371
3372    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3373        return sizeof(struct VRingPackedDescEvent);
3374    }
3375
3376    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3377    return offsetof(VRingAvail, ring) +
3378        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3379}
3380
3381hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3382{
3383    int s;
3384
3385    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3386        return sizeof(struct VRingPackedDescEvent);
3387    }
3388
3389    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3390    return offsetof(VRingUsed, ring) +
3391        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3392}
3393
3394static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3395                                                           int n)
3396{
3397    unsigned int avail, used;
3398
3399    avail = vdev->vq[n].last_avail_idx;
3400    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3401
3402    used = vdev->vq[n].used_idx;
3403    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3404
3405    return avail | used << 16;
3406}
3407
3408static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3409                                                      int n)
3410{
3411    return vdev->vq[n].last_avail_idx;
3412}
3413
3414unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3415{
3416    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3417        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3418    } else {
3419        return virtio_queue_split_get_last_avail_idx(vdev, n);
3420    }
3421}
3422
3423static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3424                                                   int n, unsigned int idx)
3425{
3426    struct VirtQueue *vq = &vdev->vq[n];
3427
3428    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3429    vq->last_avail_wrap_counter =
3430        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3431    idx >>= 16;
3432    vq->used_idx = idx & 0x7ffff;
3433    vq->used_wrap_counter = !!(idx & 0x8000);
3434}
3435
3436static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3437                                                  int n, unsigned int idx)
3438{
3439        vdev->vq[n].last_avail_idx = idx;
3440        vdev->vq[n].shadow_avail_idx = idx;
3441}
3442
3443void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3444                                     unsigned int idx)
3445{
3446    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3447        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3448    } else {
3449        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3450    }
3451}
3452
3453static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3454                                                       int n)
3455{
3456    /* We don't have a reference like avail idx in shared memory */
3457    return;
3458}
3459
3460static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3461                                                      int n)
3462{
3463    RCU_READ_LOCK_GUARD();
3464    if (vdev->vq[n].vring.desc) {
3465        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3466        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3467    }
3468}
3469
3470void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3471{
3472    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3473        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3474    } else {
3475        virtio_queue_split_restore_last_avail_idx(vdev, n);
3476    }
3477}
3478
3479static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3480{
3481    /* used idx was updated through set_last_avail_idx() */
3482    return;
3483}
3484
3485static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3486{
3487    RCU_READ_LOCK_GUARD();
3488    if (vdev->vq[n].vring.desc) {
3489        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3490    }
3491}
3492
3493void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3494{
3495    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3496        return virtio_queue_packed_update_used_idx(vdev, n);
3497    } else {
3498        return virtio_split_packed_update_used_idx(vdev, n);
3499    }
3500}
3501
3502void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3503{
3504    vdev->vq[n].signalled_used_valid = false;
3505}
3506
3507VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3508{
3509    return vdev->vq + n;
3510}
3511
3512uint16_t virtio_get_queue_index(VirtQueue *vq)
3513{
3514    return vq->queue_index;
3515}
3516
3517static void virtio_queue_guest_notifier_read(EventNotifier *n)
3518{
3519    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3520    if (event_notifier_test_and_clear(n)) {
3521        virtio_irq(vq);
3522    }
3523}
3524
3525void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3526                                                bool with_irqfd)
3527{
3528    if (assign && !with_irqfd) {
3529        event_notifier_set_handler(&vq->guest_notifier,
3530                                   virtio_queue_guest_notifier_read);
3531    } else {
3532        event_notifier_set_handler(&vq->guest_notifier, NULL);
3533    }
3534    if (!assign) {
3535        /* Test and clear notifier before closing it,
3536         * in case poll callback didn't have time to run. */
3537        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3538    }
3539}
3540
3541EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3542{
3543    return &vq->guest_notifier;
3544}
3545
3546static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3547{
3548    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3549
3550    virtio_queue_set_notification(vq, 0);
3551}
3552
3553static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3554{
3555    EventNotifier *n = opaque;
3556    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3557
3558    return vq->vring.desc && !virtio_queue_empty(vq);
3559}
3560
3561static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3562{
3563    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3564
3565    virtio_queue_notify_vq(vq);
3566}
3567
3568static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3569{
3570    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3571
3572    /* Caller polls once more after this to catch requests that race with us */
3573    virtio_queue_set_notification(vq, 1);
3574}
3575
3576void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3577{
3578    aio_set_event_notifier(ctx, &vq->host_notifier, true,
3579                           virtio_queue_host_notifier_read,
3580                           virtio_queue_host_notifier_aio_poll,
3581                           virtio_queue_host_notifier_aio_poll_ready);
3582    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3583                                virtio_queue_host_notifier_aio_poll_begin,
3584                                virtio_queue_host_notifier_aio_poll_end);
3585}
3586
3587/*
3588 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3589 * this for rx virtqueues and similar cases where the virtqueue handler
3590 * function does not pop all elements. When the virtqueue is left non-empty
3591 * polling consumes CPU cycles and should not be used.
3592 */
3593void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3594{
3595    aio_set_event_notifier(ctx, &vq->host_notifier, true,
3596                           virtio_queue_host_notifier_read,
3597                           NULL, NULL);
3598}
3599
3600void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3601{
3602    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3603    /* Test and clear notifier before after disabling event,
3604     * in case poll callback didn't have time to run. */
3605    virtio_queue_host_notifier_read(&vq->host_notifier);
3606}
3607
3608void virtio_queue_host_notifier_read(EventNotifier *n)
3609{
3610    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3611    if (event_notifier_test_and_clear(n)) {
3612        virtio_queue_notify_vq(vq);
3613    }
3614}
3615
3616EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3617{
3618    return &vq->host_notifier;
3619}
3620
3621void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3622{
3623    vq->host_notifier_enabled = enabled;
3624}
3625
3626int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3627                                      MemoryRegion *mr, bool assign)
3628{
3629    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3630    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3631
3632    if (k->set_host_notifier_mr) {
3633        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3634    }
3635
3636    return -1;
3637}
3638
3639void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3640{
3641    g_free(vdev->bus_name);
3642    vdev->bus_name = g_strdup(bus_name);
3643}
3644
3645void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3646{
3647    va_list ap;
3648
3649    va_start(ap, fmt);
3650    error_vreport(fmt, ap);
3651    va_end(ap);
3652
3653    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3654        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3655        virtio_notify_config(vdev);
3656    }
3657
3658    vdev->broken = true;
3659}
3660
3661static void virtio_memory_listener_commit(MemoryListener *listener)
3662{
3663    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3664    int i;
3665
3666    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3667        if (vdev->vq[i].vring.num == 0) {
3668            break;
3669        }
3670        virtio_init_region_cache(vdev, i);
3671    }
3672}
3673
3674static void virtio_device_realize(DeviceState *dev, Error **errp)
3675{
3676    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3677    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3678    Error *err = NULL;
3679
3680    /* Devices should either use vmsd or the load/save methods */
3681    assert(!vdc->vmsd || !vdc->load);
3682
3683    if (vdc->realize != NULL) {
3684        vdc->realize(dev, &err);
3685        if (err != NULL) {
3686            error_propagate(errp, err);
3687            return;
3688        }
3689    }
3690
3691    virtio_bus_device_plugged(vdev, &err);
3692    if (err != NULL) {
3693        error_propagate(errp, err);
3694        vdc->unrealize(dev);
3695        return;
3696    }
3697
3698    vdev->listener.commit = virtio_memory_listener_commit;
3699    vdev->listener.name = "virtio";
3700    memory_listener_register(&vdev->listener, vdev->dma_as);
3701}
3702
3703static void virtio_device_unrealize(DeviceState *dev)
3704{
3705    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3706    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3707
3708    memory_listener_unregister(&vdev->listener);
3709    virtio_bus_device_unplugged(vdev);
3710
3711    if (vdc->unrealize != NULL) {
3712        vdc->unrealize(dev);
3713    }
3714
3715    g_free(vdev->bus_name);
3716    vdev->bus_name = NULL;
3717}
3718
3719static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3720{
3721    int i;
3722    if (!vdev->vq) {
3723        return;
3724    }
3725
3726    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3727        if (vdev->vq[i].vring.num == 0) {
3728            break;
3729        }
3730        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3731    }
3732    g_free(vdev->vq);
3733}
3734
3735static void virtio_device_instance_finalize(Object *obj)
3736{
3737    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3738
3739    virtio_device_free_virtqueues(vdev);
3740
3741    g_free(vdev->config);
3742    g_free(vdev->vector_queues);
3743}
3744
3745static Property virtio_properties[] = {
3746    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3747    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3748    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3749    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3750                     disable_legacy_check, false),
3751    DEFINE_PROP_END_OF_LIST(),
3752};
3753
3754static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3755{
3756    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3757    int i, n, r, err;
3758
3759    /*
3760     * Batch all the host notifiers in a single transaction to avoid
3761     * quadratic time complexity in address_space_update_ioeventfds().
3762     */
3763    memory_region_transaction_begin();
3764    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3765        VirtQueue *vq = &vdev->vq[n];
3766        if (!virtio_queue_get_num(vdev, n)) {
3767            continue;
3768        }
3769        r = virtio_bus_set_host_notifier(qbus, n, true);
3770        if (r < 0) {
3771            err = r;
3772            goto assign_error;
3773        }
3774        event_notifier_set_handler(&vq->host_notifier,
3775                                   virtio_queue_host_notifier_read);
3776    }
3777
3778    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3779        /* Kick right away to begin processing requests already in vring */
3780        VirtQueue *vq = &vdev->vq[n];
3781        if (!vq->vring.num) {
3782            continue;
3783        }
3784        event_notifier_set(&vq->host_notifier);
3785    }
3786    memory_region_transaction_commit();
3787    return 0;
3788
3789assign_error:
3790    i = n; /* save n for a second iteration after transaction is committed. */
3791    while (--n >= 0) {
3792        VirtQueue *vq = &vdev->vq[n];
3793        if (!virtio_queue_get_num(vdev, n)) {
3794            continue;
3795        }
3796
3797        event_notifier_set_handler(&vq->host_notifier, NULL);
3798        r = virtio_bus_set_host_notifier(qbus, n, false);
3799        assert(r >= 0);
3800    }
3801    /*
3802     * The transaction expects the ioeventfds to be open when it
3803     * commits. Do it now, before the cleanup loop.
3804     */
3805    memory_region_transaction_commit();
3806
3807    while (--i >= 0) {
3808        if (!virtio_queue_get_num(vdev, i)) {
3809            continue;
3810        }
3811        virtio_bus_cleanup_host_notifier(qbus, i);
3812    }
3813    return err;
3814}
3815
3816int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3817{
3818    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3819    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3820
3821    return virtio_bus_start_ioeventfd(vbus);
3822}
3823
3824static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3825{
3826    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3827    int n, r;
3828
3829    /*
3830     * Batch all the host notifiers in a single transaction to avoid
3831     * quadratic time complexity in address_space_update_ioeventfds().
3832     */
3833    memory_region_transaction_begin();
3834    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3835        VirtQueue *vq = &vdev->vq[n];
3836
3837        if (!virtio_queue_get_num(vdev, n)) {
3838            continue;
3839        }
3840        event_notifier_set_handler(&vq->host_notifier, NULL);
3841        r = virtio_bus_set_host_notifier(qbus, n, false);
3842        assert(r >= 0);
3843    }
3844    /*
3845     * The transaction expects the ioeventfds to be open when it
3846     * commits. Do it now, before the cleanup loop.
3847     */
3848    memory_region_transaction_commit();
3849
3850    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3851        if (!virtio_queue_get_num(vdev, n)) {
3852            continue;
3853        }
3854        virtio_bus_cleanup_host_notifier(qbus, n);
3855    }
3856}
3857
3858int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3859{
3860    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3861    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3862
3863    return virtio_bus_grab_ioeventfd(vbus);
3864}
3865
3866void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3867{
3868    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3869    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3870
3871    virtio_bus_release_ioeventfd(vbus);
3872}
3873
3874static void virtio_device_class_init(ObjectClass *klass, void *data)
3875{
3876    /* Set the default value here. */
3877    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3878    DeviceClass *dc = DEVICE_CLASS(klass);
3879
3880    dc->realize = virtio_device_realize;
3881    dc->unrealize = virtio_device_unrealize;
3882    dc->bus_type = TYPE_VIRTIO_BUS;
3883    device_class_set_props(dc, virtio_properties);
3884    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3885    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3886
3887    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3888}
3889
3890bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3891{
3892    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3893    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3894
3895    return virtio_bus_ioeventfd_enabled(vbus);
3896}
3897
3898static const TypeInfo virtio_device_info = {
3899    .name = TYPE_VIRTIO_DEVICE,
3900    .parent = TYPE_DEVICE,
3901    .instance_size = sizeof(VirtIODevice),
3902    .class_init = virtio_device_class_init,
3903    .instance_finalize = virtio_device_instance_finalize,
3904    .abstract = true,
3905    .class_size = sizeof(VirtioDeviceClass),
3906};
3907
3908static void virtio_register_types(void)
3909{
3910    type_register_static(&virtio_device_info);
3911}
3912
3913type_init(virtio_register_types)
3914