LXR qemu/migration/postcopy-ram.c

   1/*
   2 * Postcopy migration for RAM
   3 *
   4 * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Dave Gilbert  <dgilbert@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14/*
  15 * Postcopy is a migration technique where the execution flips from the
  16 * source to the destination before all the data has been copied.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include <glib.h>
  21
  22#include "qemu-common.h"
  23#include "migration/migration.h"
  24#include "migration/postcopy-ram.h"
  25#include "sysemu/sysemu.h"
  26#include "sysemu/balloon.h"
  27#include "qemu/error-report.h"
  28#include "trace.h"
  29
  30/* Arbitrary limit on size of each discard command,
  31 * keeps them around ~200 bytes
  32 */
  33#define MAX_DISCARDS_PER_COMMAND 12
  34
  35struct PostcopyDiscardState {
  36    const char *ramblock_name;
  37    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
  38    uint16_t cur_entry;
  39    /*
  40     * Start and length of a discard range (bytes)
  41     */
  42    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  43    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  44    unsigned int nsentwords;
  45    unsigned int nsentcmds;
  46};
  47
  48/* Postcopy needs to detect accesses to pages that haven't yet been copied
  49 * across, and efficiently map new pages in, the techniques for doing this
  50 * are target OS specific.
  51 */
  52#if defined(__linux__)
  53
  54#include <poll.h>
  55#include <sys/mman.h>
  56#include <sys/ioctl.h>
  57#include <sys/syscall.h>
  58#include <asm/types.h> /* for __u64 */
  59#endif
  60
  61#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  62#include <sys/eventfd.h>
  63#include <linux/userfaultfd.h>
  64
  65static bool ufd_version_check(int ufd)
  66{
  67    struct uffdio_api api_struct;
  68    uint64_t ioctl_mask;
  69
  70    api_struct.api = UFFD_API;
  71    api_struct.features = 0;
  72    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  73        error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  74                     strerror(errno));
  75        return false;
  76    }
  77
  78    ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  79                 (__u64)1 << _UFFDIO_UNREGISTER;
  80    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  81        error_report("Missing userfault features: %" PRIx64,
  82                     (uint64_t)(~api_struct.ioctls & ioctl_mask));
  83        return false;
  84    }
  85
  86    return true;
  87}
  88
  89/*
  90 * Note: This has the side effect of munlock'ing all of RAM, that's
  91 * normally fine since if the postcopy succeeds it gets turned back on at the
  92 * end.
  93 */
  94bool postcopy_ram_supported_by_host(void)
  95{
  96    long pagesize = getpagesize();
  97    int ufd = -1;
  98    bool ret = false; /* Error unless we change it */
  99    void *testarea = NULL;
 100    struct uffdio_register reg_struct;
 101    struct uffdio_range range_struct;
 102    uint64_t feature_mask;
 103
 104    if ((1ul << qemu_target_page_bits()) > pagesize) {
 105        error_report("Target page size bigger than host page size");
 106        goto out;
 107    }
 108
 109    ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 110    if (ufd == -1) {
 111        error_report("%s: userfaultfd not available: %s", __func__,
 112                     strerror(errno));
 113        goto out;
 114    }
 115
 116    /* Version and features check */
 117    if (!ufd_version_check(ufd)) {
 118        goto out;
 119    }
 120
 121    /*
 122     * userfault and mlock don't go together; we'll put it back later if
 123     * it was enabled.
 124     */
 125    if (munlockall()) {
 126        error_report("%s: munlockall: %s", __func__,  strerror(errno));
 127        return -1;
 128    }
 129
 130    /*
 131     *  We need to check that the ops we need are supported on anon memory
 132     *  To do that we need to register a chunk and see the flags that
 133     *  are returned.
 134     */
 135    testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 136                                    MAP_ANONYMOUS, -1, 0);
 137    if (testarea == MAP_FAILED) {
 138        error_report("%s: Failed to map test area: %s", __func__,
 139                     strerror(errno));
 140        goto out;
 141    }
 142    g_assert(((size_t)testarea & (pagesize-1)) == 0);
 143
 144    reg_struct.range.start = (uintptr_t)testarea;
 145    reg_struct.range.len = pagesize;
 146    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 147
 148    if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 149        error_report("%s userfault register: %s", __func__, strerror(errno));
 150        goto out;
 151    }
 152
 153    range_struct.start = (uintptr_t)testarea;
 154    range_struct.len = pagesize;
 155    if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 156        error_report("%s userfault unregister: %s", __func__, strerror(errno));
 157        goto out;
 158    }
 159
 160    feature_mask = (__u64)1 << _UFFDIO_WAKE |
 161                   (__u64)1 << _UFFDIO_COPY |
 162                   (__u64)1 << _UFFDIO_ZEROPAGE;
 163    if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 164        error_report("Missing userfault map features: %" PRIx64,
 165                     (uint64_t)(~reg_struct.ioctls & feature_mask));
 166        goto out;
 167    }
 168
 169    /* Success! */
 170    ret = true;
 171out:
 172    if (testarea) {
 173        munmap(testarea, pagesize);
 174    }
 175    if (ufd != -1) {
 176        close(ufd);
 177    }
 178    return ret;
 179}
 180
 181/**
 182 * postcopy_ram_discard_range: Discard a range of memory.
 183 * We can assume that if we've been called postcopy_ram_hosttest returned true.
 184 *
 185 * @mis: Current incoming migration state.
 186 * @start, @length: range of memory to discard.
 187 *
 188 * returns: 0 on success.
 189 */
 190int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
 191                               size_t length)
 192{
 193    trace_postcopy_ram_discard_range(start, length);
 194    if (madvise(start, length, MADV_DONTNEED)) {
 195        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
 196        return -1;
 197    }
 198
 199    return 0;
 200}
 201
 202/*
 203 * Setup an area of RAM so that it *can* be used for postcopy later; this
 204 * must be done right at the start prior to pre-copy.
 205 * opaque should be the MIS.
 206 */
 207static int init_range(const char *block_name, void *host_addr,
 208                      ram_addr_t offset, ram_addr_t length, void *opaque)
 209{
 210    MigrationIncomingState *mis = opaque;
 211
 212    trace_postcopy_init_range(block_name, host_addr, offset, length);
 213
 214    /*
 215     * We need the whole of RAM to be truly empty for postcopy, so things
 216     * like ROMs and any data tables built during init must be zero'd
 217     * - we're going to get the copy from the source anyway.
 218     * (Precopy will just overwrite this data, so doesn't need the discard)
 219     */
 220    if (postcopy_ram_discard_range(mis, host_addr, length)) {
 221        return -1;
 222    }
 223
 224    return 0;
 225}
 226
 227/*
 228 * At the end of migration, undo the effects of init_range
 229 * opaque should be the MIS.
 230 */
 231static int cleanup_range(const char *block_name, void *host_addr,
 232                        ram_addr_t offset, ram_addr_t length, void *opaque)
 233{
 234    MigrationIncomingState *mis = opaque;
 235    struct uffdio_range range_struct;
 236    trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 237
 238    /*
 239     * We turned off hugepage for the precopy stage with postcopy enabled
 240     * we can turn it back on now.
 241     */
 242    qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 243
 244    /*
 245     * We can also turn off userfault now since we should have all the
 246     * pages.   It can be useful to leave it on to debug postcopy
 247     * if you're not sure it's always getting every page.
 248     */
 249    range_struct.start = (uintptr_t)host_addr;
 250    range_struct.len = length;
 251
 252    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 253        error_report("%s: userfault unregister %s", __func__, strerror(errno));
 254
 255        return -1;
 256    }
 257
 258    return 0;
 259}
 260
 261/*
 262 * Initialise postcopy-ram, setting the RAM to a state where we can go into
 263 * postcopy later; must be called prior to any precopy.
 264 * called from arch_init's similarly named ram_postcopy_incoming_init
 265 */
 266int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 267{
 268    if (qemu_ram_foreach_block(init_range, mis)) {
 269        return -1;
 270    }
 271
 272    return 0;
 273}
 274
 275/*
 276 * At the end of a migration where postcopy_ram_incoming_init was called.
 277 */
 278int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 279{
 280    trace_postcopy_ram_incoming_cleanup_entry();
 281
 282    if (mis->have_fault_thread) {
 283        uint64_t tmp64;
 284
 285        if (qemu_ram_foreach_block(cleanup_range, mis)) {
 286            return -1;
 287        }
 288        /*
 289         * Tell the fault_thread to exit, it's an eventfd that should
 290         * currently be at 0, we're going to increment it to 1
 291         */
 292        tmp64 = 1;
 293        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 294            trace_postcopy_ram_incoming_cleanup_join();
 295            qemu_thread_join(&mis->fault_thread);
 296        } else {
 297            /* Not much we can do here, but may as well report it */
 298            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 299                         strerror(errno));
 300        }
 301        trace_postcopy_ram_incoming_cleanup_closeuf();
 302        close(mis->userfault_fd);
 303        close(mis->userfault_quit_fd);
 304        mis->have_fault_thread = false;
 305    }
 306
 307    qemu_balloon_inhibit(false);
 308
 309    if (enable_mlock) {
 310        if (os_mlock() < 0) {
 311            error_report("mlock: %s", strerror(errno));
 312            /*
 313             * It doesn't feel right to fail at this point, we have a valid
 314             * VM state.
 315             */
 316        }
 317    }
 318
 319    postcopy_state_set(POSTCOPY_INCOMING_END);
 320    migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 321
 322    if (mis->postcopy_tmp_page) {
 323        munmap(mis->postcopy_tmp_page, getpagesize());
 324        mis->postcopy_tmp_page = NULL;
 325    }
 326    trace_postcopy_ram_incoming_cleanup_exit();
 327    return 0;
 328}
 329
 330/*
 331 * Disable huge pages on an area
 332 */
 333static int nhp_range(const char *block_name, void *host_addr,
 334                    ram_addr_t offset, ram_addr_t length, void *opaque)
 335{
 336    trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 337
 338    /*
 339     * Before we do discards we need to ensure those discards really
 340     * do delete areas of the page, even if THP thinks a hugepage would
 341     * be a good idea, so force hugepages off.
 342     */
 343    qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 344
 345    return 0;
 346}
 347
 348/*
 349 * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 350 * however leaving it until after precopy means that most of the precopy
 351 * data is still THPd
 352 */
 353int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 354{
 355    if (qemu_ram_foreach_block(nhp_range, mis)) {
 356        return -1;
 357    }
 358
 359    postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 360
 361    return 0;
 362}
 363
 364/*
 365 * Mark the given area of RAM as requiring notification to unwritten areas
 366 * Used as a  callback on qemu_ram_foreach_block.
 367 *   host_addr: Base of area to mark
 368 *   offset: Offset in the whole ram arena
 369 *   length: Length of the section
 370 *   opaque: MigrationIncomingState pointer
 371 * Returns 0 on success
 372 */
 373static int ram_block_enable_notify(const char *block_name, void *host_addr,
 374                                   ram_addr_t offset, ram_addr_t length,
 375                                   void *opaque)
 376{
 377    MigrationIncomingState *mis = opaque;
 378    struct uffdio_register reg_struct;
 379
 380    reg_struct.range.start = (uintptr_t)host_addr;
 381    reg_struct.range.len = length;
 382    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 383
 384    /* Now tell our userfault_fd that it's responsible for this area */
 385    if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 386        error_report("%s userfault register: %s", __func__, strerror(errno));
 387        return -1;
 388    }
 389
 390    return 0;
 391}
 392
 393/*
 394 * Handle faults detected by the USERFAULT markings
 395 */
 396static void *postcopy_ram_fault_thread(void *opaque)
 397{
 398    MigrationIncomingState *mis = opaque;
 399    struct uffd_msg msg;
 400    int ret;
 401    size_t hostpagesize = getpagesize();
 402    RAMBlock *rb = NULL;
 403    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 404
 405    trace_postcopy_ram_fault_thread_entry();
 406    qemu_sem_post(&mis->fault_thread_sem);
 407
 408    while (true) {
 409        ram_addr_t rb_offset;
 410        ram_addr_t in_raspace;
 411        struct pollfd pfd[2];
 412
 413        /*
 414         * We're mainly waiting for the kernel to give us a faulting HVA,
 415         * however we can be told to quit via userfault_quit_fd which is
 416         * an eventfd
 417         */
 418        pfd[0].fd = mis->userfault_fd;
 419        pfd[0].events = POLLIN;
 420        pfd[0].revents = 0;
 421        pfd[1].fd = mis->userfault_quit_fd;
 422        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 423        pfd[1].revents = 0;
 424
 425        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 426            error_report("%s: userfault poll: %s", __func__, strerror(errno));
 427            break;
 428        }
 429
 430        if (pfd[1].revents) {
 431            trace_postcopy_ram_fault_thread_quit();
 432            break;
 433        }
 434
 435        ret = read(mis->userfault_fd, &msg, sizeof(msg));
 436        if (ret != sizeof(msg)) {
 437            if (errno == EAGAIN) {
 438                /*
 439                 * if a wake up happens on the other thread just after
 440                 * the poll, there is nothing to read.
 441                 */
 442                continue;
 443            }
 444            if (ret < 0) {
 445                error_report("%s: Failed to read full userfault message: %s",
 446                             __func__, strerror(errno));
 447                break;
 448            } else {
 449                error_report("%s: Read %d bytes from userfaultfd expected %zd",
 450                             __func__, ret, sizeof(msg));
 451                break; /* Lost alignment, don't know what we'd read next */
 452            }
 453        }
 454        if (msg.event != UFFD_EVENT_PAGEFAULT) {
 455            error_report("%s: Read unexpected event %ud from userfaultfd",
 456                         __func__, msg.event);
 457            continue; /* It's not a page fault, shouldn't happen */
 458        }
 459
 460        rb = qemu_ram_block_from_host(
 461                 (void *)(uintptr_t)msg.arg.pagefault.address,
 462                 true, &in_raspace, &rb_offset);
 463        if (!rb) {
 464            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 465                         PRIx64, (uint64_t)msg.arg.pagefault.address);
 466            break;
 467        }
 468
 469        rb_offset &= ~(hostpagesize - 1);
 470        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 471                                                qemu_ram_get_idstr(rb),
 472                                                rb_offset);
 473
 474        /*
 475         * Send the request to the source - we want to request one
 476         * of our host page sizes (which is >= TPS)
 477         */
 478        if (rb != last_rb) {
 479            last_rb = rb;
 480            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 481                                     rb_offset, hostpagesize);
 482        } else {
 483            /* Save some space */
 484            migrate_send_rp_req_pages(mis, NULL,
 485                                     rb_offset, hostpagesize);
 486        }
 487    }
 488    trace_postcopy_ram_fault_thread_exit();
 489    return NULL;
 490}
 491
 492int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 493{
 494    /* Open the fd for the kernel to give us userfaults */
 495    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 496    if (mis->userfault_fd == -1) {
 497        error_report("%s: Failed to open userfault fd: %s", __func__,
 498                     strerror(errno));
 499        return -1;
 500    }
 501
 502    /*
 503     * Although the host check already tested the API, we need to
 504     * do the check again as an ABI handshake on the new fd.
 505     */
 506    if (!ufd_version_check(mis->userfault_fd)) {
 507        return -1;
 508    }
 509
 510    /* Now an eventfd we use to tell the fault-thread to quit */
 511    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 512    if (mis->userfault_quit_fd == -1) {
 513        error_report("%s: Opening userfault_quit_fd: %s", __func__,
 514                     strerror(errno));
 515        close(mis->userfault_fd);
 516        return -1;
 517    }
 518
 519    qemu_sem_init(&mis->fault_thread_sem, 0);
 520    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 521                       postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 522    qemu_sem_wait(&mis->fault_thread_sem);
 523    qemu_sem_destroy(&mis->fault_thread_sem);
 524    mis->have_fault_thread = true;
 525
 526    /* Mark so that we get notified of accesses to unwritten areas */
 527    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 528        return -1;
 529    }
 530
 531    /*
 532     * Ballooning can mark pages as absent while we're postcopying
 533     * that would cause false userfaults.
 534     */
 535    qemu_balloon_inhibit(true);
 536
 537    trace_postcopy_ram_enable_notify();
 538
 539    return 0;
 540}
 541
 542/*
 543 * Place a host page (from) at (host) atomically
 544 * returns 0 on success
 545 */
 546int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
 547{
 548    struct uffdio_copy copy_struct;
 549
 550    copy_struct.dst = (uint64_t)(uintptr_t)host;
 551    copy_struct.src = (uint64_t)(uintptr_t)from;
 552    copy_struct.len = getpagesize();
 553    copy_struct.mode = 0;
 554
 555    /* copy also acks to the kernel waking the stalled thread up
 556     * TODO: We can inhibit that ack and only do it if it was requested
 557     * which would be slightly cheaper, but we'd have to be careful
 558     * of the order of updating our page state.
 559     */
 560    if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 561        int e = errno;
 562        error_report("%s: %s copy host: %p from: %p",
 563                     __func__, strerror(e), host, from);
 564
 565        return -e;
 566    }
 567
 568    trace_postcopy_place_page(host);
 569    return 0;
 570}
 571
 572/*
 573 * Place a zero page at (host) atomically
 574 * returns 0 on success
 575 */
 576int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 577{
 578    struct uffdio_zeropage zero_struct;
 579
 580    zero_struct.range.start = (uint64_t)(uintptr_t)host;
 581    zero_struct.range.len = getpagesize();
 582    zero_struct.mode = 0;
 583
 584    if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 585        int e = errno;
 586        error_report("%s: %s zero host: %p",
 587                     __func__, strerror(e), host);
 588
 589        return -e;
 590    }
 591
 592    trace_postcopy_place_page_zero(host);
 593    return 0;
 594}
 595
 596/*
 597 * Returns a target page of memory that can be mapped at a later point in time
 598 * using postcopy_place_page
 599 * The same address is used repeatedly, postcopy_place_page just takes the
 600 * backing page away.
 601 * Returns: Pointer to allocated page
 602 *
 603 */
 604void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 605{
 606    if (!mis->postcopy_tmp_page) {
 607        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
 608                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
 609                             MAP_ANONYMOUS, -1, 0);
 610        if (!mis->postcopy_tmp_page) {
 611            error_report("%s: %s", __func__, strerror(errno));
 612            return NULL;
 613        }
 614    }
 615
 616    return mis->postcopy_tmp_page;
 617}
 618
 619#else
 620/* No target OS support, stubs just fail */
 621bool postcopy_ram_supported_by_host(void)
 622{
 623    error_report("%s: No OS support", __func__);
 624    return false;
 625}
 626
 627int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 628{
 629    error_report("postcopy_ram_incoming_init: No OS support");
 630    return -1;
 631}
 632
 633int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 634{
 635    assert(0);
 636    return -1;
 637}
 638
 639int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
 640                               size_t length)
 641{
 642    assert(0);
 643    return -1;
 644}
 645
 646int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 647{
 648    assert(0);
 649    return -1;
 650}
 651
 652int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 653{
 654    assert(0);
 655    return -1;
 656}
 657
 658int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
 659{
 660    assert(0);
 661    return -1;
 662}
 663
 664int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 665{
 666    assert(0);
 667    return -1;
 668}
 669
 670void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 671{
 672    assert(0);
 673    return NULL;
 674}
 675
 676#endif
 677
 678/* ------------------------------------------------------------------------- */
 679
 680/**
 681 * postcopy_discard_send_init: Called at the start of each RAMBlock before
 682 *   asking to discard individual ranges.
 683 *
 684 * @ms: The current migration state.
 685 * @offset: the bitmap offset of the named RAMBlock in the migration
 686 *   bitmap.
 687 * @name: RAMBlock that discards will operate on.
 688 *
 689 * returns: a new PDS.
 690 */
 691PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 692                                                 unsigned long offset,
 693                                                 const char *name)
 694{
 695    PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 696
 697    if (res) {
 698        res->ramblock_name = name;
 699        res->offset = offset;
 700    }
 701
 702    return res;
 703}
 704
 705/**
 706 * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 707 *   discard. May send a discard message, may just leave it queued to
 708 *   be sent later.
 709 *
 710 * @ms: Current migration state.
 711 * @pds: Structure initialised by postcopy_discard_send_init().
 712 * @start,@length: a range of pages in the migration bitmap in the
 713 *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 714 */
 715void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 716                                unsigned long start, unsigned long length)
 717{
 718    size_t tp_bits = qemu_target_page_bits();
 719    /* Convert to byte offsets within the RAM block */
 720    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
 721    pds->length_list[pds->cur_entry] = length << tp_bits;
 722    trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 723    pds->cur_entry++;
 724    pds->nsentwords++;
 725
 726    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 727        /* Full set, ship it! */
 728        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 729                                              pds->ramblock_name,
 730                                              pds->cur_entry,
 731                                              pds->start_list,
 732                                              pds->length_list);
 733        pds->nsentcmds++;
 734        pds->cur_entry = 0;
 735    }
 736}
 737
 738/**
 739 * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 740 * bitmap code. Sends any outstanding discard messages, frees the PDS
 741 *
 742 * @ms: Current migration state.
 743 * @pds: Structure initialised by postcopy_discard_send_init().
 744 */
 745void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 746{
 747    /* Anything unsent? */
 748    if (pds->cur_entry) {
 749        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 750                                              pds->ramblock_name,
 751                                              pds->cur_entry,
 752                                              pds->start_list,
 753                                              pds->length_list);
 754        pds->nsentcmds++;
 755    }
 756
 757    trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 758                                       pds->nsentcmds);
 759
 760    g_free(pds);
 761}
 762