qemu/hw/virtio/virtio-balloon.c
<<
>>
Prefs
   1/*
   2 * Virtio Balloon Device
   3 *
   4 * Copyright IBM, Corp. 2008
   5 * Copyright (C) 2011 Red Hat, Inc.
   6 * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
   7 *
   8 * Authors:
   9 *  Anthony Liguori   <aliguori@us.ibm.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qemu/iov.h"
  18#include "qemu/module.h"
  19#include "qemu/timer.h"
  20#include "hw/virtio/virtio.h"
  21#include "hw/mem/pc-dimm.h"
  22#include "sysemu/balloon.h"
  23#include "hw/virtio/virtio-balloon.h"
  24#include "exec/address-spaces.h"
  25#include "qapi/error.h"
  26#include "qapi/qapi-events-misc.h"
  27#include "qapi/visitor.h"
  28#include "trace.h"
  29#include "qemu/error-report.h"
  30#include "migration/misc.h"
  31
  32#include "hw/virtio/virtio-bus.h"
  33#include "hw/virtio/virtio-access.h"
  34
  35#define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
  36
  37typedef struct PartiallyBalloonedPage {
  38    ram_addr_t base_gpa;
  39    unsigned long *bitmap;
  40} PartiallyBalloonedPage;
  41
  42static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp)
  43{
  44    if (!pbp->bitmap) {
  45        return;
  46    }
  47    g_free(pbp->bitmap);
  48    pbp->bitmap = NULL;
  49}
  50
  51static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp,
  52                                     ram_addr_t base_gpa,
  53                                     long subpages)
  54{
  55    pbp->base_gpa = base_gpa;
  56    pbp->bitmap = bitmap_new(subpages);
  57}
  58
  59static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp,
  60                                       ram_addr_t base_gpa)
  61{
  62    return pbp->base_gpa == base_gpa;
  63}
  64
  65static void balloon_inflate_page(VirtIOBalloon *balloon,
  66                                 MemoryRegion *mr, hwaddr mr_offset,
  67                                 PartiallyBalloonedPage *pbp)
  68{
  69    void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
  70    ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
  71    RAMBlock *rb;
  72    size_t rb_page_size;
  73    int subpages;
  74
  75    /* XXX is there a better way to get to the RAMBlock than via a
  76     * host address? */
  77    rb = qemu_ram_block_from_host(addr, false, &rb_offset);
  78    rb_page_size = qemu_ram_pagesize(rb);
  79
  80    if (rb_page_size == BALLOON_PAGE_SIZE) {
  81        /* Easy case */
  82
  83        ram_block_discard_range(rb, rb_offset, rb_page_size);
  84        /* We ignore errors from ram_block_discard_range(), because it
  85         * has already reported them, and failing to discard a balloon
  86         * page is not fatal */
  87        return;
  88    }
  89
  90    /* Hard case
  91     *
  92     * We've put a piece of a larger host page into the balloon - we
  93     * need to keep track until we have a whole host page to
  94     * discard
  95     */
  96    warn_report_once(
  97"Balloon used with backing page size > 4kiB, this may not be reliable");
  98
  99    rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size);
 100    subpages = rb_page_size / BALLOON_PAGE_SIZE;
 101    base_gpa = memory_region_get_ram_addr(mr) + mr_offset -
 102               (rb_offset - rb_aligned_offset);
 103
 104    if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) {
 105        /* We've partially ballooned part of a host page, but now
 106         * we're trying to balloon part of a different one.  Too hard,
 107         * give up on the old partial page */
 108        virtio_balloon_pbp_free(pbp);
 109    }
 110
 111    if (!pbp->bitmap) {
 112        virtio_balloon_pbp_alloc(pbp, base_gpa, subpages);
 113    }
 114
 115    set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE,
 116            pbp->bitmap);
 117
 118    if (bitmap_full(pbp->bitmap, subpages)) {
 119        /* We've accumulated a full host page, we can actually discard
 120         * it now */
 121
 122        ram_block_discard_range(rb, rb_aligned_offset, rb_page_size);
 123        /* We ignore errors from ram_block_discard_range(), because it
 124         * has already reported them, and failing to discard a balloon
 125         * page is not fatal */
 126        virtio_balloon_pbp_free(pbp);
 127    }
 128}
 129
 130static void balloon_deflate_page(VirtIOBalloon *balloon,
 131                                 MemoryRegion *mr, hwaddr mr_offset)
 132{
 133    void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
 134    ram_addr_t rb_offset;
 135    RAMBlock *rb;
 136    size_t rb_page_size;
 137    void *host_addr;
 138    int ret;
 139
 140    /* XXX is there a better way to get to the RAMBlock than via a
 141     * host address? */
 142    rb = qemu_ram_block_from_host(addr, false, &rb_offset);
 143    rb_page_size = qemu_ram_pagesize(rb);
 144
 145    host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
 146
 147    /* When a page is deflated, we hint the whole host page it lives
 148     * on, since we can't do anything smaller */
 149    ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
 150    if (ret != 0) {
 151        warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
 152                    strerror(errno));
 153        /* Otherwise ignore, failing to page hint shouldn't be fatal */
 154    }
 155}
 156
 157static const char *balloon_stat_names[] = {
 158   [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
 159   [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
 160   [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
 161   [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
 162   [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
 163   [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
 164   [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
 165   [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
 166   [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
 167   [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
 168   [VIRTIO_BALLOON_S_NR] = NULL
 169};
 170
 171/*
 172 * reset_stats - Mark all items in the stats array as unset
 173 *
 174 * This function needs to be called at device initialization and before
 175 * updating to a set of newly-generated stats.  This will ensure that no
 176 * stale values stick around in case the guest reports a subset of the supported
 177 * statistics.
 178 */
 179static inline void reset_stats(VirtIOBalloon *dev)
 180{
 181    int i;
 182    for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
 183}
 184
 185static bool balloon_stats_supported(const VirtIOBalloon *s)
 186{
 187    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 188    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
 189}
 190
 191static bool balloon_stats_enabled(const VirtIOBalloon *s)
 192{
 193    return s->stats_poll_interval > 0;
 194}
 195
 196static void balloon_stats_destroy_timer(VirtIOBalloon *s)
 197{
 198    if (balloon_stats_enabled(s)) {
 199        timer_del(s->stats_timer);
 200        timer_free(s->stats_timer);
 201        s->stats_timer = NULL;
 202        s->stats_poll_interval = 0;
 203    }
 204}
 205
 206static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
 207{
 208    timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
 209}
 210
 211static void balloon_stats_poll_cb(void *opaque)
 212{
 213    VirtIOBalloon *s = opaque;
 214    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 215
 216    if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
 217        /* re-schedule */
 218        balloon_stats_change_timer(s, s->stats_poll_interval);
 219        return;
 220    }
 221
 222    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
 223    virtio_notify(vdev, s->svq);
 224    g_free(s->stats_vq_elem);
 225    s->stats_vq_elem = NULL;
 226}
 227
 228static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
 229                                  void *opaque, Error **errp)
 230{
 231    Error *err = NULL;
 232    VirtIOBalloon *s = opaque;
 233    int i;
 234
 235    visit_start_struct(v, name, NULL, 0, &err);
 236    if (err) {
 237        goto out;
 238    }
 239    visit_type_int(v, "last-update", &s->stats_last_update, &err);
 240    if (err) {
 241        goto out_end;
 242    }
 243
 244    visit_start_struct(v, "stats", NULL, 0, &err);
 245    if (err) {
 246        goto out_end;
 247    }
 248    for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
 249        visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err);
 250        if (err) {
 251            goto out_nested;
 252        }
 253    }
 254    visit_check_struct(v, &err);
 255out_nested:
 256    visit_end_struct(v, NULL);
 257
 258    if (!err) {
 259        visit_check_struct(v, &err);
 260    }
 261out_end:
 262    visit_end_struct(v, NULL);
 263out:
 264    error_propagate(errp, err);
 265}
 266
 267static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
 268                                            const char *name, void *opaque,
 269                                            Error **errp)
 270{
 271    VirtIOBalloon *s = opaque;
 272    visit_type_int(v, name, &s->stats_poll_interval, errp);
 273}
 274
 275static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
 276                                            const char *name, void *opaque,
 277                                            Error **errp)
 278{
 279    VirtIOBalloon *s = opaque;
 280    Error *local_err = NULL;
 281    int64_t value;
 282
 283    visit_type_int(v, name, &value, &local_err);
 284    if (local_err) {
 285        error_propagate(errp, local_err);
 286        return;
 287    }
 288
 289    if (value < 0) {
 290        error_setg(errp, "timer value must be greater than zero");
 291        return;
 292    }
 293
 294    if (value > UINT32_MAX) {
 295        error_setg(errp, "timer value is too big");
 296        return;
 297    }
 298
 299    if (value == s->stats_poll_interval) {
 300        return;
 301    }
 302
 303    if (value == 0) {
 304        /* timer=0 disables the timer */
 305        balloon_stats_destroy_timer(s);
 306        return;
 307    }
 308
 309    if (balloon_stats_enabled(s)) {
 310        /* timer interval change */
 311        s->stats_poll_interval = value;
 312        balloon_stats_change_timer(s, value);
 313        return;
 314    }
 315
 316    /* create a new timer */
 317    g_assert(s->stats_timer == NULL);
 318    s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
 319    s->stats_poll_interval = value;
 320    balloon_stats_change_timer(s, 0);
 321}
 322
 323static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 324{
 325    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 326    VirtQueueElement *elem;
 327    MemoryRegionSection section;
 328
 329    for (;;) {
 330        PartiallyBalloonedPage pbp = {};
 331        size_t offset = 0;
 332        uint32_t pfn;
 333
 334        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 335        if (!elem) {
 336            break;
 337        }
 338
 339        while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
 340            unsigned int p = virtio_ldl_p(vdev, &pfn);
 341            hwaddr pa;
 342
 343            pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
 344            offset += 4;
 345
 346            section = memory_region_find(get_system_memory(), pa,
 347                                         BALLOON_PAGE_SIZE);
 348            if (!section.mr) {
 349                trace_virtio_balloon_bad_addr(pa);
 350                continue;
 351            }
 352            if (!memory_region_is_ram(section.mr) ||
 353                memory_region_is_rom(section.mr) ||
 354                memory_region_is_romd(section.mr)) {
 355                trace_virtio_balloon_bad_addr(pa);
 356                memory_region_unref(section.mr);
 357                continue;
 358            }
 359
 360            trace_virtio_balloon_handle_output(memory_region_name(section.mr),
 361                                               pa);
 362            if (!qemu_balloon_is_inhibited()) {
 363                if (vq == s->ivq) {
 364                    balloon_inflate_page(s, section.mr,
 365                                         section.offset_within_region, &pbp);
 366                } else if (vq == s->dvq) {
 367                    balloon_deflate_page(s, section.mr, section.offset_within_region);
 368                } else {
 369                    g_assert_not_reached();
 370                }
 371            }
 372            memory_region_unref(section.mr);
 373        }
 374
 375        virtqueue_push(vq, elem, offset);
 376        virtio_notify(vdev, vq);
 377        g_free(elem);
 378        virtio_balloon_pbp_free(&pbp);
 379    }
 380}
 381
 382static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
 383{
 384    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 385    VirtQueueElement *elem;
 386    VirtIOBalloonStat stat;
 387    size_t offset = 0;
 388    qemu_timeval tv;
 389
 390    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 391    if (!elem) {
 392        goto out;
 393    }
 394
 395    if (s->stats_vq_elem != NULL) {
 396        /* This should never happen if the driver follows the spec. */
 397        virtqueue_push(vq, s->stats_vq_elem, 0);
 398        virtio_notify(vdev, vq);
 399        g_free(s->stats_vq_elem);
 400    }
 401
 402    s->stats_vq_elem = elem;
 403
 404    /* Initialize the stats to get rid of any stale values.  This is only
 405     * needed to handle the case where a guest supports fewer stats than it
 406     * used to (ie. it has booted into an old kernel).
 407     */
 408    reset_stats(s);
 409
 410    while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
 411           == sizeof(stat)) {
 412        uint16_t tag = virtio_tswap16(vdev, stat.tag);
 413        uint64_t val = virtio_tswap64(vdev, stat.val);
 414
 415        offset += sizeof(stat);
 416        if (tag < VIRTIO_BALLOON_S_NR)
 417            s->stats[tag] = val;
 418    }
 419    s->stats_vq_offset = offset;
 420
 421    if (qemu_gettimeofday(&tv) < 0) {
 422        warn_report("%s: failed to get time of day", __func__);
 423        goto out;
 424    }
 425
 426    s->stats_last_update = tv.tv_sec;
 427
 428out:
 429    if (balloon_stats_enabled(s)) {
 430        balloon_stats_change_timer(s, s->stats_poll_interval);
 431    }
 432}
 433
 434static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
 435                                               VirtQueue *vq)
 436{
 437    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 438    qemu_bh_schedule(s->free_page_bh);
 439}
 440
 441static bool get_free_page_hints(VirtIOBalloon *dev)
 442{
 443    VirtQueueElement *elem;
 444    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 445    VirtQueue *vq = dev->free_page_vq;
 446    bool ret = true;
 447
 448    while (dev->block_iothread) {
 449        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
 450    }
 451
 452    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 453    if (!elem) {
 454        return false;
 455    }
 456
 457    if (elem->out_num) {
 458        uint32_t id;
 459        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
 460                                 &id, sizeof(id));
 461
 462        virtio_tswap32s(vdev, &id);
 463        if (unlikely(size != sizeof(id))) {
 464            virtio_error(vdev, "received an incorrect cmd id");
 465            ret = false;
 466            goto out;
 467        }
 468        if (id == dev->free_page_report_cmd_id) {
 469            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
 470        } else {
 471            /*
 472             * Stop the optimization only when it has started. This
 473             * avoids a stale stop sign for the previous command.
 474             */
 475            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
 476                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
 477            }
 478        }
 479    }
 480
 481    if (elem->in_num) {
 482        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
 483            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
 484                                      elem->in_sg[0].iov_len);
 485        }
 486    }
 487
 488out:
 489    virtqueue_push(vq, elem, 1);
 490    g_free(elem);
 491    return ret;
 492}
 493
 494static void virtio_ballloon_get_free_page_hints(void *opaque)
 495{
 496    VirtIOBalloon *dev = opaque;
 497    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 498    VirtQueue *vq = dev->free_page_vq;
 499    bool continue_to_get_hints;
 500
 501    do {
 502        qemu_mutex_lock(&dev->free_page_lock);
 503        virtio_queue_set_notification(vq, 0);
 504        continue_to_get_hints = get_free_page_hints(dev);
 505        qemu_mutex_unlock(&dev->free_page_lock);
 506        virtio_notify(vdev, vq);
 507      /*
 508       * Start to poll the vq once the reporting started. Otherwise, continue
 509       * only when there are entries on the vq, which need to be given back.
 510       */
 511    } while (continue_to_get_hints ||
 512             dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
 513    virtio_queue_set_notification(vq, 1);
 514}
 515
 516static bool virtio_balloon_free_page_support(void *opaque)
 517{
 518    VirtIOBalloon *s = opaque;
 519    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 520
 521    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
 522}
 523
 524static void virtio_balloon_free_page_start(VirtIOBalloon *s)
 525{
 526    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 527
 528    /* For the stop and copy phase, we don't need to start the optimization */
 529    if (!vdev->vm_running) {
 530        return;
 531    }
 532
 533    if (s->free_page_report_cmd_id == UINT_MAX) {
 534        s->free_page_report_cmd_id =
 535                       VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
 536    } else {
 537        s->free_page_report_cmd_id++;
 538    }
 539
 540    s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
 541    virtio_notify_config(vdev);
 542}
 543
 544static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
 545{
 546    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 547
 548    if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
 549        /*
 550         * The lock also guarantees us that the
 551         * virtio_ballloon_get_free_page_hints exits after the
 552         * free_page_report_status is set to S_STOP.
 553         */
 554        qemu_mutex_lock(&s->free_page_lock);
 555        /*
 556         * The guest hasn't done the reporting, so host sends a notification
 557         * to the guest to actively stop the reporting.
 558         */
 559        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
 560        qemu_mutex_unlock(&s->free_page_lock);
 561        virtio_notify_config(vdev);
 562    }
 563}
 564
 565static void virtio_balloon_free_page_done(VirtIOBalloon *s)
 566{
 567    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 568
 569    s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
 570    virtio_notify_config(vdev);
 571}
 572
 573static int
 574virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
 575{
 576    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
 577                                      free_page_report_notify);
 578    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 579    PrecopyNotifyData *pnd = data;
 580
 581    if (!virtio_balloon_free_page_support(dev)) {
 582        /*
 583         * This is an optimization provided to migration, so just return 0 to
 584         * have the normal migration process not affected when this feature is
 585         * not supported.
 586         */
 587        return 0;
 588    }
 589
 590    switch (pnd->reason) {
 591    case PRECOPY_NOTIFY_SETUP:
 592        precopy_enable_free_page_optimization();
 593        break;
 594    case PRECOPY_NOTIFY_COMPLETE:
 595    case PRECOPY_NOTIFY_CLEANUP:
 596    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
 597        virtio_balloon_free_page_stop(dev);
 598        break;
 599    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
 600        if (vdev->vm_running) {
 601            virtio_balloon_free_page_start(dev);
 602        } else {
 603            virtio_balloon_free_page_done(dev);
 604        }
 605        break;
 606    default:
 607        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
 608    }
 609
 610    return 0;
 611}
 612
 613static size_t virtio_balloon_config_size(VirtIOBalloon *s)
 614{
 615    uint64_t features = s->host_features;
 616
 617    if (s->qemu_4_0_config_size) {
 618        return sizeof(struct virtio_balloon_config);
 619    }
 620    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
 621        return sizeof(struct virtio_balloon_config);
 622    }
 623    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
 624        return offsetof(struct virtio_balloon_config, poison_val);
 625    }
 626    return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
 627}
 628
 629static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 630{
 631    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
 632    struct virtio_balloon_config config = {};
 633
 634    config.num_pages = cpu_to_le32(dev->num_pages);
 635    config.actual = cpu_to_le32(dev->actual);
 636
 637    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
 638        config.free_page_report_cmd_id =
 639                       cpu_to_le32(dev->free_page_report_cmd_id);
 640    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
 641        config.free_page_report_cmd_id =
 642                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
 643    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
 644        config.free_page_report_cmd_id =
 645                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
 646    }
 647
 648    trace_virtio_balloon_get_config(config.num_pages, config.actual);
 649    memcpy(config_data, &config, virtio_balloon_config_size(dev));
 650}
 651
 652static int build_dimm_list(Object *obj, void *opaque)
 653{
 654    GSList **list = opaque;
 655
 656    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
 657        DeviceState *dev = DEVICE(obj);
 658        if (dev->realized) { /* only realized DIMMs matter */
 659            *list = g_slist_prepend(*list, dev);
 660        }
 661    }
 662
 663    object_child_foreach(obj, build_dimm_list, opaque);
 664    return 0;
 665}
 666
 667static ram_addr_t get_current_ram_size(void)
 668{
 669    GSList *list = NULL, *item;
 670    ram_addr_t size = ram_size;
 671
 672    build_dimm_list(qdev_get_machine(), &list);
 673    for (item = list; item; item = g_slist_next(item)) {
 674        Object *obj = OBJECT(item->data);
 675        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
 676            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
 677                                            &error_abort);
 678        }
 679    }
 680    g_slist_free(list);
 681
 682    return size;
 683}
 684
 685static void virtio_balloon_set_config(VirtIODevice *vdev,
 686                                      const uint8_t *config_data)
 687{
 688    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
 689    struct virtio_balloon_config config;
 690    uint32_t oldactual = dev->actual;
 691    ram_addr_t vm_ram_size = get_current_ram_size();
 692
 693    memcpy(&config, config_data, virtio_balloon_config_size(dev));
 694    dev->actual = le32_to_cpu(config.actual);
 695    if (dev->actual != oldactual) {
 696        qapi_event_send_balloon_change(vm_ram_size -
 697                        ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
 698    }
 699    trace_virtio_balloon_set_config(dev->actual, oldactual);
 700}
 701
 702static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
 703                                            Error **errp)
 704{
 705    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
 706    f |= dev->host_features;
 707    virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
 708
 709    return f;
 710}
 711
 712static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 713{
 714    VirtIOBalloon *dev = opaque;
 715    info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
 716                                             VIRTIO_BALLOON_PFN_SHIFT);
 717}
 718
 719static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
 720{
 721    VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
 722    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 723    ram_addr_t vm_ram_size = get_current_ram_size();
 724
 725    if (target > vm_ram_size) {
 726        target = vm_ram_size;
 727    }
 728    if (target) {
 729        dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
 730        virtio_notify_config(vdev);
 731    }
 732    trace_virtio_balloon_to_target(target, dev->num_pages);
 733}
 734
 735static int virtio_balloon_post_load_device(void *opaque, int version_id)
 736{
 737    VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
 738
 739    if (balloon_stats_enabled(s)) {
 740        balloon_stats_change_timer(s, s->stats_poll_interval);
 741    }
 742    return 0;
 743}
 744
 745static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
 746    .name = "virtio-balloon-device/free-page-report",
 747    .version_id = 1,
 748    .minimum_version_id = 1,
 749    .needed = virtio_balloon_free_page_support,
 750    .fields = (VMStateField[]) {
 751        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
 752        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
 753        VMSTATE_END_OF_LIST()
 754    }
 755};
 756
 757static const VMStateDescription vmstate_virtio_balloon_device = {
 758    .name = "virtio-balloon-device",
 759    .version_id = 1,
 760    .minimum_version_id = 1,
 761    .post_load = virtio_balloon_post_load_device,
 762    .fields = (VMStateField[]) {
 763        VMSTATE_UINT32(num_pages, VirtIOBalloon),
 764        VMSTATE_UINT32(actual, VirtIOBalloon),
 765        VMSTATE_END_OF_LIST()
 766    },
 767    .subsections = (const VMStateDescription * []) {
 768        &vmstate_virtio_balloon_free_page_report,
 769        NULL
 770    }
 771};
 772
 773static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
 774{
 775    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 776    VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 777    int ret;
 778
 779    virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
 780                virtio_balloon_config_size(s));
 781
 782    ret = qemu_add_balloon_handler(virtio_balloon_to_target,
 783                                   virtio_balloon_stat, s);
 784
 785    if (ret < 0) {
 786        error_setg(errp, "Only one balloon device is supported");
 787        virtio_cleanup(vdev);
 788        return;
 789    }
 790
 791    s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
 792    s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
 793    s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 794
 795    if (virtio_has_feature(s->host_features,
 796                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
 797        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
 798                                           virtio_balloon_handle_free_page_vq);
 799        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
 800        s->free_page_report_cmd_id =
 801                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
 802        s->free_page_report_notify.notify =
 803                                       virtio_balloon_free_page_report_notify;
 804        precopy_add_notifier(&s->free_page_report_notify);
 805        if (s->iothread) {
 806            object_ref(OBJECT(s->iothread));
 807            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
 808                                       virtio_ballloon_get_free_page_hints, s);
 809            qemu_mutex_init(&s->free_page_lock);
 810            qemu_cond_init(&s->free_page_cond);
 811            s->block_iothread = false;
 812        } else {
 813            /* Simply disable this feature if the iothread wasn't created. */
 814            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
 815            virtio_error(vdev, "iothread is missing");
 816        }
 817    }
 818    reset_stats(s);
 819}
 820
 821static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
 822{
 823    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 824    VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 825
 826    if (virtio_balloon_free_page_support(s)) {
 827        qemu_bh_delete(s->free_page_bh);
 828        virtio_balloon_free_page_stop(s);
 829        precopy_remove_notifier(&s->free_page_report_notify);
 830    }
 831    balloon_stats_destroy_timer(s);
 832    qemu_remove_balloon_handler(s);
 833    virtio_cleanup(vdev);
 834}
 835
 836static void virtio_balloon_device_reset(VirtIODevice *vdev)
 837{
 838    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 839
 840    if (virtio_balloon_free_page_support(s)) {
 841        virtio_balloon_free_page_stop(s);
 842    }
 843
 844    if (s->stats_vq_elem != NULL) {
 845        virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
 846        g_free(s->stats_vq_elem);
 847        s->stats_vq_elem = NULL;
 848    }
 849}
 850
 851static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
 852{
 853    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 854
 855    if (!s->stats_vq_elem && vdev->vm_running &&
 856        (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
 857        /* poll stats queue for the element we have discarded when the VM
 858         * was stopped */
 859        virtio_balloon_receive_stats(vdev, s->svq);
 860    }
 861
 862    if (virtio_balloon_free_page_support(s)) {
 863        /*
 864         * The VM is woken up and the iothread was blocked, so signal it to
 865         * continue.
 866         */
 867        if (vdev->vm_running && s->block_iothread) {
 868            qemu_mutex_lock(&s->free_page_lock);
 869            s->block_iothread = false;
 870            qemu_cond_signal(&s->free_page_cond);
 871            qemu_mutex_unlock(&s->free_page_lock);
 872        }
 873
 874        /* The VM is stopped, block the iothread. */
 875        if (!vdev->vm_running) {
 876            qemu_mutex_lock(&s->free_page_lock);
 877            s->block_iothread = true;
 878            qemu_mutex_unlock(&s->free_page_lock);
 879        }
 880    }
 881}
 882
 883static void virtio_balloon_instance_init(Object *obj)
 884{
 885    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
 886
 887    object_property_add(obj, "guest-stats", "guest statistics",
 888                        balloon_stats_get_all, NULL, NULL, s, NULL);
 889
 890    object_property_add(obj, "guest-stats-polling-interval", "int",
 891                        balloon_stats_get_poll_interval,
 892                        balloon_stats_set_poll_interval,
 893                        NULL, s, NULL);
 894}
 895
 896static const VMStateDescription vmstate_virtio_balloon = {
 897    .name = "virtio-balloon",
 898    .minimum_version_id = 1,
 899    .version_id = 1,
 900    .fields = (VMStateField[]) {
 901        VMSTATE_VIRTIO_DEVICE,
 902        VMSTATE_END_OF_LIST()
 903    },
 904};
 905
 906static Property virtio_balloon_properties[] = {
 907    DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
 908                    VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
 909    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
 910                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
 911    /* QEMU 4.0 accidentally changed the config size even when free-page-hint
 912     * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
 913     * property retains this quirk for QEMU 4.1 machine types.
 914     */
 915    DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
 916                     qemu_4_0_config_size, false),
 917    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
 918                     IOThread *),
 919    DEFINE_PROP_END_OF_LIST(),
 920};
 921
 922static void virtio_balloon_class_init(ObjectClass *klass, void *data)
 923{
 924    DeviceClass *dc = DEVICE_CLASS(klass);
 925    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 926
 927    dc->props = virtio_balloon_properties;
 928    dc->vmsd = &vmstate_virtio_balloon;
 929    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 930    vdc->realize = virtio_balloon_device_realize;
 931    vdc->unrealize = virtio_balloon_device_unrealize;
 932    vdc->reset = virtio_balloon_device_reset;
 933    vdc->get_config = virtio_balloon_get_config;
 934    vdc->set_config = virtio_balloon_set_config;
 935    vdc->get_features = virtio_balloon_get_features;
 936    vdc->set_status = virtio_balloon_set_status;
 937    vdc->vmsd = &vmstate_virtio_balloon_device;
 938}
 939
 940static const TypeInfo virtio_balloon_info = {
 941    .name = TYPE_VIRTIO_BALLOON,
 942    .parent = TYPE_VIRTIO_DEVICE,
 943    .instance_size = sizeof(VirtIOBalloon),
 944    .instance_init = virtio_balloon_instance_init,
 945    .class_init = virtio_balloon_class_init,
 946};
 947
 948static void virtio_register_types(void)
 949{
 950    type_register_static(&virtio_balloon_info);
 951}
 952
 953type_init(virtio_register_types)
 954