LXR qemu/migration/postcopy-ram.c

   1/*
   2 * Postcopy migration for RAM
   3 *
   4 * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Dave Gilbert  <dgilbert@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14/*
  15 * Postcopy is a migration technique where the execution flips from the
  16 * source to the destination before all the data has been copied.
  17 */
  18
  19#include "qemu/osdep.h"
  20
  21#include "qemu-common.h"
  22#include "migration/migration.h"
  23#include "migration/postcopy-ram.h"
  24#include "sysemu/sysemu.h"
  25#include "sysemu/balloon.h"
  26#include "qemu/error-report.h"
  27#include "trace.h"
  28
  29/* Arbitrary limit on size of each discard command,
  30 * keeps them around ~200 bytes
  31 */
  32#define MAX_DISCARDS_PER_COMMAND 12
  33
  34struct PostcopyDiscardState {
  35    const char *ramblock_name;
  36    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
  37    uint16_t cur_entry;
  38    /*
  39     * Start and length of a discard range (bytes)
  40     */
  41    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  42    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  43    unsigned int nsentwords;
  44    unsigned int nsentcmds;
  45};
  46
  47/* Postcopy needs to detect accesses to pages that haven't yet been copied
  48 * across, and efficiently map new pages in, the techniques for doing this
  49 * are target OS specific.
  50 */
  51#if defined(__linux__)
  52
  53#include <poll.h>
  54#include <sys/ioctl.h>
  55#include <sys/syscall.h>
  56#include <asm/types.h> /* for __u64 */
  57#endif
  58
  59#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  60#include <sys/eventfd.h>
  61#include <linux/userfaultfd.h>
  62
  63static bool ufd_version_check(int ufd)
  64{
  65    struct uffdio_api api_struct;
  66    uint64_t ioctl_mask;
  67
  68    api_struct.api = UFFD_API;
  69    api_struct.features = 0;
  70    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  71        error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  72                     strerror(errno));
  73        return false;
  74    }
  75
  76    ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  77                 (__u64)1 << _UFFDIO_UNREGISTER;
  78    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  79        error_report("Missing userfault features: %" PRIx64,
  80                     (uint64_t)(~api_struct.ioctls & ioctl_mask));
  81        return false;
  82    }
  83
  84    return true;
  85}
  86
  87/*
  88 * Note: This has the side effect of munlock'ing all of RAM, that's
  89 * normally fine since if the postcopy succeeds it gets turned back on at the
  90 * end.
  91 */
  92bool postcopy_ram_supported_by_host(void)
  93{
  94    long pagesize = getpagesize();
  95    int ufd = -1;
  96    bool ret = false; /* Error unless we change it */
  97    void *testarea = NULL;
  98    struct uffdio_register reg_struct;
  99    struct uffdio_range range_struct;
 100    uint64_t feature_mask;
 101
 102    if ((1ul << qemu_target_page_bits()) > pagesize) {
 103        error_report("Target page size bigger than host page size");
 104        goto out;
 105    }
 106
 107    ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 108    if (ufd == -1) {
 109        error_report("%s: userfaultfd not available: %s", __func__,
 110                     strerror(errno));
 111        goto out;
 112    }
 113
 114    /* Version and features check */
 115    if (!ufd_version_check(ufd)) {
 116        goto out;
 117    }
 118
 119    /*
 120     * userfault and mlock don't go together; we'll put it back later if
 121     * it was enabled.
 122     */
 123    if (munlockall()) {
 124        error_report("%s: munlockall: %s", __func__,  strerror(errno));
 125        return -1;
 126    }
 127
 128    /*
 129     *  We need to check that the ops we need are supported on anon memory
 130     *  To do that we need to register a chunk and see the flags that
 131     *  are returned.
 132     */
 133    testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 134                                    MAP_ANONYMOUS, -1, 0);
 135    if (testarea == MAP_FAILED) {
 136        error_report("%s: Failed to map test area: %s", __func__,
 137                     strerror(errno));
 138        goto out;
 139    }
 140    g_assert(((size_t)testarea & (pagesize-1)) == 0);
 141
 142    reg_struct.range.start = (uintptr_t)testarea;
 143    reg_struct.range.len = pagesize;
 144    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 145
 146    if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 147        error_report("%s userfault register: %s", __func__, strerror(errno));
 148        goto out;
 149    }
 150
 151    range_struct.start = (uintptr_t)testarea;
 152    range_struct.len = pagesize;
 153    if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 154        error_report("%s userfault unregister: %s", __func__, strerror(errno));
 155        goto out;
 156    }
 157
 158    feature_mask = (__u64)1 << _UFFDIO_WAKE |
 159                   (__u64)1 << _UFFDIO_COPY |
 160                   (__u64)1 << _UFFDIO_ZEROPAGE;
 161    if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 162        error_report("Missing userfault map features: %" PRIx64,
 163                     (uint64_t)(~reg_struct.ioctls & feature_mask));
 164        goto out;
 165    }
 166
 167    /* Success! */
 168    ret = true;
 169out:
 170    if (testarea) {
 171        munmap(testarea, pagesize);
 172    }
 173    if (ufd != -1) {
 174        close(ufd);
 175    }
 176    return ret;
 177}
 178
 179/**
 180 * postcopy_ram_discard_range: Discard a range of memory.
 181 * We can assume that if we've been called postcopy_ram_hosttest returned true.
 182 *
 183 * @mis: Current incoming migration state.
 184 * @start, @length: range of memory to discard.
 185 *
 186 * returns: 0 on success.
 187 */
 188int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
 189                               size_t length)
 190{
 191    trace_postcopy_ram_discard_range(start, length);
 192    if (madvise(start, length, MADV_DONTNEED)) {
 193        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
 194        return -1;
 195    }
 196
 197    return 0;
 198}
 199
 200/*
 201 * Setup an area of RAM so that it *can* be used for postcopy later; this
 202 * must be done right at the start prior to pre-copy.
 203 * opaque should be the MIS.
 204 */
 205static int init_range(const char *block_name, void *host_addr,
 206                      ram_addr_t offset, ram_addr_t length, void *opaque)
 207{
 208    MigrationIncomingState *mis = opaque;
 209
 210    trace_postcopy_init_range(block_name, host_addr, offset, length);
 211
 212    /*
 213     * We need the whole of RAM to be truly empty for postcopy, so things
 214     * like ROMs and any data tables built during init must be zero'd
 215     * - we're going to get the copy from the source anyway.
 216     * (Precopy will just overwrite this data, so doesn't need the discard)
 217     */
 218    if (postcopy_ram_discard_range(mis, host_addr, length)) {
 219        return -1;
 220    }
 221
 222    return 0;
 223}
 224
 225/*
 226 * At the end of migration, undo the effects of init_range
 227 * opaque should be the MIS.
 228 */
 229static int cleanup_range(const char *block_name, void *host_addr,
 230                        ram_addr_t offset, ram_addr_t length, void *opaque)
 231{
 232    MigrationIncomingState *mis = opaque;
 233    struct uffdio_range range_struct;
 234    trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 235
 236    /*
 237     * We turned off hugepage for the precopy stage with postcopy enabled
 238     * we can turn it back on now.
 239     */
 240    qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 241
 242    /*
 243     * We can also turn off userfault now since we should have all the
 244     * pages.   It can be useful to leave it on to debug postcopy
 245     * if you're not sure it's always getting every page.
 246     */
 247    range_struct.start = (uintptr_t)host_addr;
 248    range_struct.len = length;
 249
 250    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 251        error_report("%s: userfault unregister %s", __func__, strerror(errno));
 252
 253        return -1;
 254    }
 255
 256    return 0;
 257}
 258
 259/*
 260 * Initialise postcopy-ram, setting the RAM to a state where we can go into
 261 * postcopy later; must be called prior to any precopy.
 262 * called from arch_init's similarly named ram_postcopy_incoming_init
 263 */
 264int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 265{
 266    if (qemu_ram_foreach_block(init_range, mis)) {
 267        return -1;
 268    }
 269
 270    return 0;
 271}
 272
 273/*
 274 * At the end of a migration where postcopy_ram_incoming_init was called.
 275 */
 276int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 277{
 278    trace_postcopy_ram_incoming_cleanup_entry();
 279
 280    if (mis->have_fault_thread) {
 281        uint64_t tmp64;
 282
 283        if (qemu_ram_foreach_block(cleanup_range, mis)) {
 284            return -1;
 285        }
 286        /*
 287         * Tell the fault_thread to exit, it's an eventfd that should
 288         * currently be at 0, we're going to increment it to 1
 289         */
 290        tmp64 = 1;
 291        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 292            trace_postcopy_ram_incoming_cleanup_join();
 293            qemu_thread_join(&mis->fault_thread);
 294        } else {
 295            /* Not much we can do here, but may as well report it */
 296            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 297                         strerror(errno));
 298        }
 299        trace_postcopy_ram_incoming_cleanup_closeuf();
 300        close(mis->userfault_fd);
 301        close(mis->userfault_quit_fd);
 302        mis->have_fault_thread = false;
 303    }
 304
 305    qemu_balloon_inhibit(false);
 306
 307    if (enable_mlock) {
 308        if (os_mlock() < 0) {
 309            error_report("mlock: %s", strerror(errno));
 310            /*
 311             * It doesn't feel right to fail at this point, we have a valid
 312             * VM state.
 313             */
 314        }
 315    }
 316
 317    postcopy_state_set(POSTCOPY_INCOMING_END);
 318    migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 319
 320    if (mis->postcopy_tmp_page) {
 321        munmap(mis->postcopy_tmp_page, getpagesize());
 322        mis->postcopy_tmp_page = NULL;
 323    }
 324    trace_postcopy_ram_incoming_cleanup_exit();
 325    return 0;
 326}
 327
 328/*
 329 * Disable huge pages on an area
 330 */
 331static int nhp_range(const char *block_name, void *host_addr,
 332                    ram_addr_t offset, ram_addr_t length, void *opaque)
 333{
 334    trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 335
 336    /*
 337     * Before we do discards we need to ensure those discards really
 338     * do delete areas of the page, even if THP thinks a hugepage would
 339     * be a good idea, so force hugepages off.
 340     */
 341    qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 342
 343    return 0;
 344}
 345
 346/*
 347 * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 348 * however leaving it until after precopy means that most of the precopy
 349 * data is still THPd
 350 */
 351int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 352{
 353    if (qemu_ram_foreach_block(nhp_range, mis)) {
 354        return -1;
 355    }
 356
 357    postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 358
 359    return 0;
 360}
 361
 362/*
 363 * Mark the given area of RAM as requiring notification to unwritten areas
 364 * Used as a  callback on qemu_ram_foreach_block.
 365 *   host_addr: Base of area to mark
 366 *   offset: Offset in the whole ram arena
 367 *   length: Length of the section
 368 *   opaque: MigrationIncomingState pointer
 369 * Returns 0 on success
 370 */
 371static int ram_block_enable_notify(const char *block_name, void *host_addr,
 372                                   ram_addr_t offset, ram_addr_t length,
 373                                   void *opaque)
 374{
 375    MigrationIncomingState *mis = opaque;
 376    struct uffdio_register reg_struct;
 377
 378    reg_struct.range.start = (uintptr_t)host_addr;
 379    reg_struct.range.len = length;
 380    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 381
 382    /* Now tell our userfault_fd that it's responsible for this area */
 383    if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 384        error_report("%s userfault register: %s", __func__, strerror(errno));
 385        return -1;
 386    }
 387
 388    return 0;
 389}
 390
 391/*
 392 * Handle faults detected by the USERFAULT markings
 393 */
 394static void *postcopy_ram_fault_thread(void *opaque)
 395{
 396    MigrationIncomingState *mis = opaque;
 397    struct uffd_msg msg;
 398    int ret;
 399    size_t hostpagesize = getpagesize();
 400    RAMBlock *rb = NULL;
 401    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 402
 403    trace_postcopy_ram_fault_thread_entry();
 404    qemu_sem_post(&mis->fault_thread_sem);
 405
 406    while (true) {
 407        ram_addr_t rb_offset;
 408        struct pollfd pfd[2];
 409
 410        /*
 411         * We're mainly waiting for the kernel to give us a faulting HVA,
 412         * however we can be told to quit via userfault_quit_fd which is
 413         * an eventfd
 414         */
 415        pfd[0].fd = mis->userfault_fd;
 416        pfd[0].events = POLLIN;
 417        pfd[0].revents = 0;
 418        pfd[1].fd = mis->userfault_quit_fd;
 419        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 420        pfd[1].revents = 0;
 421
 422        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 423            error_report("%s: userfault poll: %s", __func__, strerror(errno));
 424            break;
 425        }
 426
 427        if (pfd[1].revents) {
 428            trace_postcopy_ram_fault_thread_quit();
 429            break;
 430        }
 431
 432        ret = read(mis->userfault_fd, &msg, sizeof(msg));
 433        if (ret != sizeof(msg)) {
 434            if (errno == EAGAIN) {
 435                /*
 436                 * if a wake up happens on the other thread just after
 437                 * the poll, there is nothing to read.
 438                 */
 439                continue;
 440            }
 441            if (ret < 0) {
 442                error_report("%s: Failed to read full userfault message: %s",
 443                             __func__, strerror(errno));
 444                break;
 445            } else {
 446                error_report("%s: Read %d bytes from userfaultfd expected %zd",
 447                             __func__, ret, sizeof(msg));
 448                break; /* Lost alignment, don't know what we'd read next */
 449            }
 450        }
 451        if (msg.event != UFFD_EVENT_PAGEFAULT) {
 452            error_report("%s: Read unexpected event %ud from userfaultfd",
 453                         __func__, msg.event);
 454            continue; /* It's not a page fault, shouldn't happen */
 455        }
 456
 457        rb = qemu_ram_block_from_host(
 458                 (void *)(uintptr_t)msg.arg.pagefault.address,
 459                 true, &rb_offset);
 460        if (!rb) {
 461            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 462                         PRIx64, (uint64_t)msg.arg.pagefault.address);
 463            break;
 464        }
 465
 466        rb_offset &= ~(hostpagesize - 1);
 467        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 468                                                qemu_ram_get_idstr(rb),
 469                                                rb_offset);
 470
 471        /*
 472         * Send the request to the source - we want to request one
 473         * of our host page sizes (which is >= TPS)
 474         */
 475        if (rb != last_rb) {
 476            last_rb = rb;
 477            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 478                                     rb_offset, hostpagesize);
 479        } else {
 480            /* Save some space */
 481            migrate_send_rp_req_pages(mis, NULL,
 482                                     rb_offset, hostpagesize);
 483        }
 484    }
 485    trace_postcopy_ram_fault_thread_exit();
 486    return NULL;
 487}
 488
 489int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 490{
 491    /* Open the fd for the kernel to give us userfaults */
 492    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 493    if (mis->userfault_fd == -1) {
 494        error_report("%s: Failed to open userfault fd: %s", __func__,
 495                     strerror(errno));
 496        return -1;
 497    }
 498
 499    /*
 500     * Although the host check already tested the API, we need to
 501     * do the check again as an ABI handshake on the new fd.
 502     */
 503    if (!ufd_version_check(mis->userfault_fd)) {
 504        return -1;
 505    }
 506
 507    /* Now an eventfd we use to tell the fault-thread to quit */
 508    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 509    if (mis->userfault_quit_fd == -1) {
 510        error_report("%s: Opening userfault_quit_fd: %s", __func__,
 511                     strerror(errno));
 512        close(mis->userfault_fd);
 513        return -1;
 514    }
 515
 516    qemu_sem_init(&mis->fault_thread_sem, 0);
 517    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 518                       postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 519    qemu_sem_wait(&mis->fault_thread_sem);
 520    qemu_sem_destroy(&mis->fault_thread_sem);
 521    mis->have_fault_thread = true;
 522
 523    /* Mark so that we get notified of accesses to unwritten areas */
 524    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 525        return -1;
 526    }
 527
 528    /*
 529     * Ballooning can mark pages as absent while we're postcopying
 530     * that would cause false userfaults.
 531     */
 532    qemu_balloon_inhibit(true);
 533
 534    trace_postcopy_ram_enable_notify();
 535
 536    return 0;
 537}
 538
 539/*
 540 * Place a host page (from) at (host) atomically
 541 * returns 0 on success
 542 */
 543int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
 544{
 545    struct uffdio_copy copy_struct;
 546
 547    copy_struct.dst = (uint64_t)(uintptr_t)host;
 548    copy_struct.src = (uint64_t)(uintptr_t)from;
 549    copy_struct.len = getpagesize();
 550    copy_struct.mode = 0;
 551
 552    /* copy also acks to the kernel waking the stalled thread up
 553     * TODO: We can inhibit that ack and only do it if it was requested
 554     * which would be slightly cheaper, but we'd have to be careful
 555     * of the order of updating our page state.
 556     */
 557    if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 558        int e = errno;
 559        error_report("%s: %s copy host: %p from: %p",
 560                     __func__, strerror(e), host, from);
 561
 562        return -e;
 563    }
 564
 565    trace_postcopy_place_page(host);
 566    return 0;
 567}
 568
 569/*
 570 * Place a zero page at (host) atomically
 571 * returns 0 on success
 572 */
 573int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 574{
 575    struct uffdio_zeropage zero_struct;
 576
 577    zero_struct.range.start = (uint64_t)(uintptr_t)host;
 578    zero_struct.range.len = getpagesize();
 579    zero_struct.mode = 0;
 580
 581    if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 582        int e = errno;
 583        error_report("%s: %s zero host: %p",
 584                     __func__, strerror(e), host);
 585
 586        return -e;
 587    }
 588
 589    trace_postcopy_place_page_zero(host);
 590    return 0;
 591}
 592
 593/*
 594 * Returns a target page of memory that can be mapped at a later point in time
 595 * using postcopy_place_page
 596 * The same address is used repeatedly, postcopy_place_page just takes the
 597 * backing page away.
 598 * Returns: Pointer to allocated page
 599 *
 600 */
 601void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 602{
 603    if (!mis->postcopy_tmp_page) {
 604        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
 605                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
 606                             MAP_ANONYMOUS, -1, 0);
 607        if (mis->postcopy_tmp_page == MAP_FAILED) {
 608            mis->postcopy_tmp_page = NULL;
 609            error_report("%s: %s", __func__, strerror(errno));
 610            return NULL;
 611        }
 612    }
 613
 614    return mis->postcopy_tmp_page;
 615}
 616
 617#else
 618/* No target OS support, stubs just fail */
 619bool postcopy_ram_supported_by_host(void)
 620{
 621    error_report("%s: No OS support", __func__);
 622    return false;
 623}
 624
 625int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 626{
 627    error_report("postcopy_ram_incoming_init: No OS support");
 628    return -1;
 629}
 630
 631int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 632{
 633    assert(0);
 634    return -1;
 635}
 636
 637int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
 638                               size_t length)
 639{
 640    assert(0);
 641    return -1;
 642}
 643
 644int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 645{
 646    assert(0);
 647    return -1;
 648}
 649
 650int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 651{
 652    assert(0);
 653    return -1;
 654}
 655
 656int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
 657{
 658    assert(0);
 659    return -1;
 660}
 661
 662int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 663{
 664    assert(0);
 665    return -1;
 666}
 667
 668void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 669{
 670    assert(0);
 671    return NULL;
 672}
 673
 674#endif
 675
 676/* ------------------------------------------------------------------------- */
 677
 678/**
 679 * postcopy_discard_send_init: Called at the start of each RAMBlock before
 680 *   asking to discard individual ranges.
 681 *
 682 * @ms: The current migration state.
 683 * @offset: the bitmap offset of the named RAMBlock in the migration
 684 *   bitmap.
 685 * @name: RAMBlock that discards will operate on.
 686 *
 687 * returns: a new PDS.
 688 */
 689PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 690                                                 unsigned long offset,
 691                                                 const char *name)
 692{
 693    PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 694
 695    if (res) {
 696        res->ramblock_name = name;
 697        res->offset = offset;
 698    }
 699
 700    return res;
 701}
 702
 703/**
 704 * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 705 *   discard. May send a discard message, may just leave it queued to
 706 *   be sent later.
 707 *
 708 * @ms: Current migration state.
 709 * @pds: Structure initialised by postcopy_discard_send_init().
 710 * @start,@length: a range of pages in the migration bitmap in the
 711 *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 712 */
 713void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 714                                unsigned long start, unsigned long length)
 715{
 716    size_t tp_bits = qemu_target_page_bits();
 717    /* Convert to byte offsets within the RAM block */
 718    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
 719    pds->length_list[pds->cur_entry] = length << tp_bits;
 720    trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 721    pds->cur_entry++;
 722    pds->nsentwords++;
 723
 724    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 725        /* Full set, ship it! */
 726        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 727                                              pds->ramblock_name,
 728                                              pds->cur_entry,
 729                                              pds->start_list,
 730                                              pds->length_list);
 731        pds->nsentcmds++;
 732        pds->cur_entry = 0;
 733    }
 734}
 735
 736/**
 737 * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 738 * bitmap code. Sends any outstanding discard messages, frees the PDS
 739 *
 740 * @ms: Current migration state.
 741 * @pds: Structure initialised by postcopy_discard_send_init().
 742 */
 743void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 744{
 745    /* Anything unsent? */
 746    if (pds->cur_entry) {
 747        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 748                                              pds->ramblock_name,
 749                                              pds->cur_entry,
 750                                              pds->start_list,
 751                                              pds->length_list);
 752        pds->nsentcmds++;
 753    }
 754
 755    trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 756                                       pds->nsentcmds);
 757
 758    g_free(pds);
 759}
 760