LXR qemu/migration/postcopy-ram.c

   1/*
   2 * Postcopy migration for RAM
   3 *
   4 * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Dave Gilbert  <dgilbert@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14/*
  15 * Postcopy is a migration technique where the execution flips from the
  16 * source to the destination before all the data has been copied.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include "exec/target_page.h"
  21#include "migration.h"
  22#include "qemu-file.h"
  23#include "savevm.h"
  24#include "postcopy-ram.h"
  25#include "ram.h"
  26#include "sysemu/sysemu.h"
  27#include "sysemu/balloon.h"
  28#include "qemu/error-report.h"
  29#include "trace.h"
  30
  31/* Arbitrary limit on size of each discard command,
  32 * keeps them around ~200 bytes
  33 */
  34#define MAX_DISCARDS_PER_COMMAND 12
  35
  36struct PostcopyDiscardState {
  37    const char *ramblock_name;
  38    uint16_t cur_entry;
  39    /*
  40     * Start and length of a discard range (bytes)
  41     */
  42    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  43    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  44    unsigned int nsentwords;
  45    unsigned int nsentcmds;
  46};
  47
  48/* Postcopy needs to detect accesses to pages that haven't yet been copied
  49 * across, and efficiently map new pages in, the techniques for doing this
  50 * are target OS specific.
  51 */
  52#if defined(__linux__)
  53
  54#include <poll.h>
  55#include <sys/ioctl.h>
  56#include <sys/syscall.h>
  57#include <asm/types.h> /* for __u64 */
  58#endif
  59
  60#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  61#include <sys/eventfd.h>
  62#include <linux/userfaultfd.h>
  63
  64static bool ufd_version_check(int ufd)
  65{
  66    struct uffdio_api api_struct;
  67    uint64_t ioctl_mask;
  68
  69    api_struct.api = UFFD_API;
  70    api_struct.features = 0;
  71    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  72        error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  73                     strerror(errno));
  74        return false;
  75    }
  76
  77    ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  78                 (__u64)1 << _UFFDIO_UNREGISTER;
  79    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  80        error_report("Missing userfault features: %" PRIx64,
  81                     (uint64_t)(~api_struct.ioctls & ioctl_mask));
  82        return false;
  83    }
  84
  85    if (getpagesize() != ram_pagesize_summary()) {
  86        bool have_hp = false;
  87        /* We've got a huge page */
  88#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
  89        have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
  90#endif
  91        if (!have_hp) {
  92            error_report("Userfault on this host does not support huge pages");
  93            return false;
  94        }
  95    }
  96    return true;
  97}
  98
  99/* Callback from postcopy_ram_supported_by_host block iterator.
 100 */
 101static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
 102                             ram_addr_t offset, ram_addr_t length, void *opaque)
 103{
 104    RAMBlock *rb = qemu_ram_block_by_name(block_name);
 105    size_t pagesize = qemu_ram_pagesize(rb);
 106
 107    if (qemu_ram_is_shared(rb)) {
 108        error_report("Postcopy on shared RAM (%s) is not yet supported",
 109                     block_name);
 110        return 1;
 111    }
 112
 113    if (length % pagesize) {
 114        error_report("Postcopy requires RAM blocks to be a page size multiple,"
 115                     " block %s is 0x" RAM_ADDR_FMT " bytes with a "
 116                     "page size of 0x%zx", block_name, length, pagesize);
 117        return 1;
 118    }
 119    return 0;
 120}
 121
 122/*
 123 * Note: This has the side effect of munlock'ing all of RAM, that's
 124 * normally fine since if the postcopy succeeds it gets turned back on at the
 125 * end.
 126 */
 127bool postcopy_ram_supported_by_host(void)
 128{
 129    long pagesize = getpagesize();
 130    int ufd = -1;
 131    bool ret = false; /* Error unless we change it */
 132    void *testarea = NULL;
 133    struct uffdio_register reg_struct;
 134    struct uffdio_range range_struct;
 135    uint64_t feature_mask;
 136
 137    if (qemu_target_page_size() > pagesize) {
 138        error_report("Target page size bigger than host page size");
 139        goto out;
 140    }
 141
 142    ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 143    if (ufd == -1) {
 144        error_report("%s: userfaultfd not available: %s", __func__,
 145                     strerror(errno));
 146        goto out;
 147    }
 148
 149    /* Version and features check */
 150    if (!ufd_version_check(ufd)) {
 151        goto out;
 152    }
 153
 154    /* We don't support postcopy with shared RAM yet */
 155    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
 156        goto out;
 157    }
 158
 159    /*
 160     * userfault and mlock don't go together; we'll put it back later if
 161     * it was enabled.
 162     */
 163    if (munlockall()) {
 164        error_report("%s: munlockall: %s", __func__,  strerror(errno));
 165        return -1;
 166    }
 167
 168    /*
 169     *  We need to check that the ops we need are supported on anon memory
 170     *  To do that we need to register a chunk and see the flags that
 171     *  are returned.
 172     */
 173    testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 174                                    MAP_ANONYMOUS, -1, 0);
 175    if (testarea == MAP_FAILED) {
 176        error_report("%s: Failed to map test area: %s", __func__,
 177                     strerror(errno));
 178        goto out;
 179    }
 180    g_assert(((size_t)testarea & (pagesize-1)) == 0);
 181
 182    reg_struct.range.start = (uintptr_t)testarea;
 183    reg_struct.range.len = pagesize;
 184    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 185
 186    if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 187        error_report("%s userfault register: %s", __func__, strerror(errno));
 188        goto out;
 189    }
 190
 191    range_struct.start = (uintptr_t)testarea;
 192    range_struct.len = pagesize;
 193    if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 194        error_report("%s userfault unregister: %s", __func__, strerror(errno));
 195        goto out;
 196    }
 197
 198    feature_mask = (__u64)1 << _UFFDIO_WAKE |
 199                   (__u64)1 << _UFFDIO_COPY |
 200                   (__u64)1 << _UFFDIO_ZEROPAGE;
 201    if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 202        error_report("Missing userfault map features: %" PRIx64,
 203                     (uint64_t)(~reg_struct.ioctls & feature_mask));
 204        goto out;
 205    }
 206
 207    /* Success! */
 208    ret = true;
 209out:
 210    if (testarea) {
 211        munmap(testarea, pagesize);
 212    }
 213    if (ufd != -1) {
 214        close(ufd);
 215    }
 216    return ret;
 217}
 218
 219/*
 220 * Setup an area of RAM so that it *can* be used for postcopy later; this
 221 * must be done right at the start prior to pre-copy.
 222 * opaque should be the MIS.
 223 */
 224static int init_range(const char *block_name, void *host_addr,
 225                      ram_addr_t offset, ram_addr_t length, void *opaque)
 226{
 227    trace_postcopy_init_range(block_name, host_addr, offset, length);
 228
 229    /*
 230     * We need the whole of RAM to be truly empty for postcopy, so things
 231     * like ROMs and any data tables built during init must be zero'd
 232     * - we're going to get the copy from the source anyway.
 233     * (Precopy will just overwrite this data, so doesn't need the discard)
 234     */
 235    if (ram_discard_range(block_name, 0, length)) {
 236        return -1;
 237    }
 238
 239    return 0;
 240}
 241
 242/*
 243 * At the end of migration, undo the effects of init_range
 244 * opaque should be the MIS.
 245 */
 246static int cleanup_range(const char *block_name, void *host_addr,
 247                        ram_addr_t offset, ram_addr_t length, void *opaque)
 248{
 249    MigrationIncomingState *mis = opaque;
 250    struct uffdio_range range_struct;
 251    trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 252
 253    /*
 254     * We turned off hugepage for the precopy stage with postcopy enabled
 255     * we can turn it back on now.
 256     */
 257    qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 258
 259    /*
 260     * We can also turn off userfault now since we should have all the
 261     * pages.   It can be useful to leave it on to debug postcopy
 262     * if you're not sure it's always getting every page.
 263     */
 264    range_struct.start = (uintptr_t)host_addr;
 265    range_struct.len = length;
 266
 267    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 268        error_report("%s: userfault unregister %s", __func__, strerror(errno));
 269
 270        return -1;
 271    }
 272
 273    return 0;
 274}
 275
 276/*
 277 * Initialise postcopy-ram, setting the RAM to a state where we can go into
 278 * postcopy later; must be called prior to any precopy.
 279 * called from arch_init's similarly named ram_postcopy_incoming_init
 280 */
 281int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 282{
 283    if (qemu_ram_foreach_block(init_range, NULL)) {
 284        return -1;
 285    }
 286
 287    return 0;
 288}
 289
 290/*
 291 * At the end of a migration where postcopy_ram_incoming_init was called.
 292 */
 293int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 294{
 295    trace_postcopy_ram_incoming_cleanup_entry();
 296
 297    if (mis->have_fault_thread) {
 298        uint64_t tmp64;
 299
 300        if (qemu_ram_foreach_block(cleanup_range, mis)) {
 301            return -1;
 302        }
 303        /*
 304         * Tell the fault_thread to exit, it's an eventfd that should
 305         * currently be at 0, we're going to increment it to 1
 306         */
 307        tmp64 = 1;
 308        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 309            trace_postcopy_ram_incoming_cleanup_join();
 310            qemu_thread_join(&mis->fault_thread);
 311        } else {
 312            /* Not much we can do here, but may as well report it */
 313            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 314                         strerror(errno));
 315        }
 316        trace_postcopy_ram_incoming_cleanup_closeuf();
 317        close(mis->userfault_fd);
 318        close(mis->userfault_quit_fd);
 319        mis->have_fault_thread = false;
 320    }
 321
 322    qemu_balloon_inhibit(false);
 323
 324    if (enable_mlock) {
 325        if (os_mlock() < 0) {
 326            error_report("mlock: %s", strerror(errno));
 327            /*
 328             * It doesn't feel right to fail at this point, we have a valid
 329             * VM state.
 330             */
 331        }
 332    }
 333
 334    postcopy_state_set(POSTCOPY_INCOMING_END);
 335
 336    if (mis->postcopy_tmp_page) {
 337        munmap(mis->postcopy_tmp_page, mis->largest_page_size);
 338        mis->postcopy_tmp_page = NULL;
 339    }
 340    if (mis->postcopy_tmp_zero_page) {
 341        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
 342        mis->postcopy_tmp_zero_page = NULL;
 343    }
 344    trace_postcopy_ram_incoming_cleanup_exit();
 345    return 0;
 346}
 347
 348/*
 349 * Disable huge pages on an area
 350 */
 351static int nhp_range(const char *block_name, void *host_addr,
 352                    ram_addr_t offset, ram_addr_t length, void *opaque)
 353{
 354    trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 355
 356    /*
 357     * Before we do discards we need to ensure those discards really
 358     * do delete areas of the page, even if THP thinks a hugepage would
 359     * be a good idea, so force hugepages off.
 360     */
 361    qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 362
 363    return 0;
 364}
 365
 366/*
 367 * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 368 * however leaving it until after precopy means that most of the precopy
 369 * data is still THPd
 370 */
 371int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 372{
 373    if (qemu_ram_foreach_block(nhp_range, mis)) {
 374        return -1;
 375    }
 376
 377    postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 378
 379    return 0;
 380}
 381
 382/*
 383 * Mark the given area of RAM as requiring notification to unwritten areas
 384 * Used as a  callback on qemu_ram_foreach_block.
 385 *   host_addr: Base of area to mark
 386 *   offset: Offset in the whole ram arena
 387 *   length: Length of the section
 388 *   opaque: MigrationIncomingState pointer
 389 * Returns 0 on success
 390 */
 391static int ram_block_enable_notify(const char *block_name, void *host_addr,
 392                                   ram_addr_t offset, ram_addr_t length,
 393                                   void *opaque)
 394{
 395    MigrationIncomingState *mis = opaque;
 396    struct uffdio_register reg_struct;
 397
 398    reg_struct.range.start = (uintptr_t)host_addr;
 399    reg_struct.range.len = length;
 400    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 401
 402    /* Now tell our userfault_fd that it's responsible for this area */
 403    if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 404        error_report("%s userfault register: %s", __func__, strerror(errno));
 405        return -1;
 406    }
 407    if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
 408        error_report("%s userfault: Region doesn't support COPY", __func__);
 409        return -1;
 410    }
 411
 412    return 0;
 413}
 414
 415/*
 416 * Handle faults detected by the USERFAULT markings
 417 */
 418static void *postcopy_ram_fault_thread(void *opaque)
 419{
 420    MigrationIncomingState *mis = opaque;
 421    struct uffd_msg msg;
 422    int ret;
 423    RAMBlock *rb = NULL;
 424    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 425
 426    trace_postcopy_ram_fault_thread_entry();
 427    qemu_sem_post(&mis->fault_thread_sem);
 428
 429    while (true) {
 430        ram_addr_t rb_offset;
 431        struct pollfd pfd[2];
 432
 433        /*
 434         * We're mainly waiting for the kernel to give us a faulting HVA,
 435         * however we can be told to quit via userfault_quit_fd which is
 436         * an eventfd
 437         */
 438        pfd[0].fd = mis->userfault_fd;
 439        pfd[0].events = POLLIN;
 440        pfd[0].revents = 0;
 441        pfd[1].fd = mis->userfault_quit_fd;
 442        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 443        pfd[1].revents = 0;
 444
 445        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 446            error_report("%s: userfault poll: %s", __func__, strerror(errno));
 447            break;
 448        }
 449
 450        if (pfd[1].revents) {
 451            trace_postcopy_ram_fault_thread_quit();
 452            break;
 453        }
 454
 455        ret = read(mis->userfault_fd, &msg, sizeof(msg));
 456        if (ret != sizeof(msg)) {
 457            if (errno == EAGAIN) {
 458                /*
 459                 * if a wake up happens on the other thread just after
 460                 * the poll, there is nothing to read.
 461                 */
 462                continue;
 463            }
 464            if (ret < 0) {
 465                error_report("%s: Failed to read full userfault message: %s",
 466                             __func__, strerror(errno));
 467                break;
 468            } else {
 469                error_report("%s: Read %d bytes from userfaultfd expected %zd",
 470                             __func__, ret, sizeof(msg));
 471                break; /* Lost alignment, don't know what we'd read next */
 472            }
 473        }
 474        if (msg.event != UFFD_EVENT_PAGEFAULT) {
 475            error_report("%s: Read unexpected event %ud from userfaultfd",
 476                         __func__, msg.event);
 477            continue; /* It's not a page fault, shouldn't happen */
 478        }
 479
 480        rb = qemu_ram_block_from_host(
 481                 (void *)(uintptr_t)msg.arg.pagefault.address,
 482                 true, &rb_offset);
 483        if (!rb) {
 484            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 485                         PRIx64, (uint64_t)msg.arg.pagefault.address);
 486            break;
 487        }
 488
 489        rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
 490        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 491                                                qemu_ram_get_idstr(rb),
 492                                                rb_offset);
 493
 494        /*
 495         * Send the request to the source - we want to request one
 496         * of our host page sizes (which is >= TPS)
 497         */
 498        if (rb != last_rb) {
 499            last_rb = rb;
 500            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 501                                     rb_offset, qemu_ram_pagesize(rb));
 502        } else {
 503            /* Save some space */
 504            migrate_send_rp_req_pages(mis, NULL,
 505                                     rb_offset, qemu_ram_pagesize(rb));
 506        }
 507    }
 508    trace_postcopy_ram_fault_thread_exit();
 509    return NULL;
 510}
 511
 512int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 513{
 514    /* Open the fd for the kernel to give us userfaults */
 515    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 516    if (mis->userfault_fd == -1) {
 517        error_report("%s: Failed to open userfault fd: %s", __func__,
 518                     strerror(errno));
 519        return -1;
 520    }
 521
 522    /*
 523     * Although the host check already tested the API, we need to
 524     * do the check again as an ABI handshake on the new fd.
 525     */
 526    if (!ufd_version_check(mis->userfault_fd)) {
 527        return -1;
 528    }
 529
 530    /* Now an eventfd we use to tell the fault-thread to quit */
 531    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 532    if (mis->userfault_quit_fd == -1) {
 533        error_report("%s: Opening userfault_quit_fd: %s", __func__,
 534                     strerror(errno));
 535        close(mis->userfault_fd);
 536        return -1;
 537    }
 538
 539    qemu_sem_init(&mis->fault_thread_sem, 0);
 540    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 541                       postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 542    qemu_sem_wait(&mis->fault_thread_sem);
 543    qemu_sem_destroy(&mis->fault_thread_sem);
 544    mis->have_fault_thread = true;
 545
 546    /* Mark so that we get notified of accesses to unwritten areas */
 547    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 548        return -1;
 549    }
 550
 551    /*
 552     * Ballooning can mark pages as absent while we're postcopying
 553     * that would cause false userfaults.
 554     */
 555    qemu_balloon_inhibit(true);
 556
 557    trace_postcopy_ram_enable_notify();
 558
 559    return 0;
 560}
 561
 562/*
 563 * Place a host page (from) at (host) atomically
 564 * returns 0 on success
 565 */
 566int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 567                        size_t pagesize)
 568{
 569    struct uffdio_copy copy_struct;
 570
 571    copy_struct.dst = (uint64_t)(uintptr_t)host;
 572    copy_struct.src = (uint64_t)(uintptr_t)from;
 573    copy_struct.len = pagesize;
 574    copy_struct.mode = 0;
 575
 576    /* copy also acks to the kernel waking the stalled thread up
 577     * TODO: We can inhibit that ack and only do it if it was requested
 578     * which would be slightly cheaper, but we'd have to be careful
 579     * of the order of updating our page state.
 580     */
 581    if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 582        int e = errno;
 583        error_report("%s: %s copy host: %p from: %p (size: %zd)",
 584                     __func__, strerror(e), host, from, pagesize);
 585
 586        return -e;
 587    }
 588
 589    trace_postcopy_place_page(host);
 590    return 0;
 591}
 592
 593/*
 594 * Place a zero page at (host) atomically
 595 * returns 0 on success
 596 */
 597int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 598                             size_t pagesize)
 599{
 600    trace_postcopy_place_page_zero(host);
 601
 602    if (pagesize == getpagesize()) {
 603        struct uffdio_zeropage zero_struct;
 604        zero_struct.range.start = (uint64_t)(uintptr_t)host;
 605        zero_struct.range.len = getpagesize();
 606        zero_struct.mode = 0;
 607
 608        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 609            int e = errno;
 610            error_report("%s: %s zero host: %p",
 611                         __func__, strerror(e), host);
 612
 613            return -e;
 614        }
 615    } else {
 616        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
 617        if (!mis->postcopy_tmp_zero_page) {
 618            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
 619                                               PROT_READ | PROT_WRITE,
 620                                               MAP_PRIVATE | MAP_ANONYMOUS,
 621                                               -1, 0);
 622            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
 623                int e = errno;
 624                mis->postcopy_tmp_zero_page = NULL;
 625                error_report("%s: %s mapping large zero page",
 626                             __func__, strerror(e));
 627                return -e;
 628            }
 629            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 630        }
 631        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
 632                                   pagesize);
 633    }
 634
 635    return 0;
 636}
 637
 638/*
 639 * Returns a target page of memory that can be mapped at a later point in time
 640 * using postcopy_place_page
 641 * The same address is used repeatedly, postcopy_place_page just takes the
 642 * backing page away.
 643 * Returns: Pointer to allocated page
 644 *
 645 */
 646void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 647{
 648    if (!mis->postcopy_tmp_page) {
 649        mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
 650                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
 651                             MAP_ANONYMOUS, -1, 0);
 652        if (mis->postcopy_tmp_page == MAP_FAILED) {
 653            mis->postcopy_tmp_page = NULL;
 654            error_report("%s: %s", __func__, strerror(errno));
 655            return NULL;
 656        }
 657    }
 658
 659    return mis->postcopy_tmp_page;
 660}
 661
 662#else
 663/* No target OS support, stubs just fail */
 664bool postcopy_ram_supported_by_host(void)
 665{
 666    error_report("%s: No OS support", __func__);
 667    return false;
 668}
 669
 670int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 671{
 672    error_report("postcopy_ram_incoming_init: No OS support");
 673    return -1;
 674}
 675
 676int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 677{
 678    assert(0);
 679    return -1;
 680}
 681
 682int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 683{
 684    assert(0);
 685    return -1;
 686}
 687
 688int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 689{
 690    assert(0);
 691    return -1;
 692}
 693
 694int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 695                        size_t pagesize)
 696{
 697    assert(0);
 698    return -1;
 699}
 700
 701int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 702                        size_t pagesize)
 703{
 704    assert(0);
 705    return -1;
 706}
 707
 708void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 709{
 710    assert(0);
 711    return NULL;
 712}
 713
 714#endif
 715
 716/* ------------------------------------------------------------------------- */
 717
 718/**
 719 * postcopy_discard_send_init: Called at the start of each RAMBlock before
 720 *   asking to discard individual ranges.
 721 *
 722 * @ms: The current migration state.
 723 * @offset: the bitmap offset of the named RAMBlock in the migration
 724 *   bitmap.
 725 * @name: RAMBlock that discards will operate on.
 726 *
 727 * returns: a new PDS.
 728 */
 729PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 730                                                 const char *name)
 731{
 732    PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 733
 734    if (res) {
 735        res->ramblock_name = name;
 736    }
 737
 738    return res;
 739}
 740
 741/**
 742 * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 743 *   discard. May send a discard message, may just leave it queued to
 744 *   be sent later.
 745 *
 746 * @ms: Current migration state.
 747 * @pds: Structure initialised by postcopy_discard_send_init().
 748 * @start,@length: a range of pages in the migration bitmap in the
 749 *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 750 */
 751void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 752                                unsigned long start, unsigned long length)
 753{
 754    size_t tp_size = qemu_target_page_size();
 755    /* Convert to byte offsets within the RAM block */
 756    pds->start_list[pds->cur_entry] = start  * tp_size;
 757    pds->length_list[pds->cur_entry] = length * tp_size;
 758    trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 759    pds->cur_entry++;
 760    pds->nsentwords++;
 761
 762    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 763        /* Full set, ship it! */
 764        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 765                                              pds->ramblock_name,
 766                                              pds->cur_entry,
 767                                              pds->start_list,
 768                                              pds->length_list);
 769        pds->nsentcmds++;
 770        pds->cur_entry = 0;
 771    }
 772}
 773
 774/**
 775 * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 776 * bitmap code. Sends any outstanding discard messages, frees the PDS
 777 *
 778 * @ms: Current migration state.
 779 * @pds: Structure initialised by postcopy_discard_send_init().
 780 */
 781void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 782{
 783    /* Anything unsent? */
 784    if (pds->cur_entry) {
 785        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 786                                              pds->ramblock_name,
 787                                              pds->cur_entry,
 788                                              pds->start_list,
 789                                              pds->length_list);
 790        pds->nsentcmds++;
 791    }
 792
 793    trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 794                                       pds->nsentcmds);
 795
 796    g_free(pds);
 797}
 798
 799/*
 800 * Current state of incoming postcopy; note this is not part of
 801 * MigrationIncomingState since it's state is used during cleanup
 802 * at the end as MIS is being freed.
 803 */
 804static PostcopyState incoming_postcopy_state;
 805
 806PostcopyState  postcopy_state_get(void)
 807{
 808    return atomic_mb_read(&incoming_postcopy_state);
 809}
 810
 811/* Set the state and return the old state */
 812PostcopyState postcopy_state_set(PostcopyState new_state)
 813{
 814    return atomic_xchg(&incoming_postcopy_state, new_state);
 815}
 816