qemu/migration/migration.c
<<
>>
Prefs
   1/*
   2 * QEMU live migration
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qemu/ctype.h"
  18#include "qemu/cutils.h"
  19#include "qemu/error-report.h"
  20#include "qemu/main-loop.h"
  21#include "migration/blocker.h"
  22#include "exec.h"
  23#include "fd.h"
  24#include "file.h"
  25#include "socket.h"
  26#include "system/runstate.h"
  27#include "system/system.h"
  28#include "system/cpu-throttle.h"
  29#include "rdma.h"
  30#include "ram.h"
  31#include "migration/cpr.h"
  32#include "migration/global_state.h"
  33#include "migration/misc.h"
  34#include "migration.h"
  35#include "migration-stats.h"
  36#include "savevm.h"
  37#include "qemu-file.h"
  38#include "channel.h"
  39#include "migration/vmstate.h"
  40#include "block/block.h"
  41#include "qapi/error.h"
  42#include "qapi/clone-visitor.h"
  43#include "qapi/qapi-visit-migration.h"
  44#include "qapi/qapi-visit-sockets.h"
  45#include "qapi/qapi-commands-migration.h"
  46#include "qapi/qapi-events-migration.h"
  47#include "qapi/qmp/qerror.h"
  48#include "qobject/qnull.h"
  49#include "qemu/rcu.h"
  50#include "postcopy-ram.h"
  51#include "qemu/thread.h"
  52#include "trace.h"
  53#include "exec/target_page.h"
  54#include "io/channel-buffer.h"
  55#include "io/channel-tls.h"
  56#include "migration/colo.h"
  57#include "hw/boards.h"
  58#include "monitor/monitor.h"
  59#include "net/announce.h"
  60#include "qemu/queue.h"
  61#include "multifd.h"
  62#include "threadinfo.h"
  63#include "qemu/yank.h"
  64#include "system/cpus.h"
  65#include "yank_functions.h"
  66#include "system/qtest.h"
  67#include "options.h"
  68#include "system/dirtylimit.h"
  69#include "qemu/sockets.h"
  70#include "system/kvm.h"
  71
  72#define NOTIFIER_ELEM_INIT(array, elem)    \
  73    [elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem])
  74
  75#define INMIGRATE_DEFAULT_EXIT_ON_ERROR true
  76
  77static NotifierWithReturnList migration_state_notifiers[] = {
  78    NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
  79    NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
  80    NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
  81};
  82
  83/* Messages sent on the return path from destination to source */
  84enum mig_rp_message_type {
  85    MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
  86    MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
  87    MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
  88
  89    MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  90    MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
  91    MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
  92    MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
  93    MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */
  94
  95    MIG_RP_MSG_MAX
  96};
  97
  98/* Migration channel types */
  99enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY };
 100
 101/* When we add fault tolerance, we could have several
 102   migrations at once.  For now we don't need to add
 103   dynamic creation of migration */
 104
 105static MigrationState *current_migration;
 106static MigrationIncomingState *current_incoming;
 107
 108static GSList *migration_blockers[MIG_MODE__MAX];
 109
 110static bool migration_object_check(MigrationState *ms, Error **errp);
 111static bool migration_switchover_start(MigrationState *s, Error **errp);
 112static bool close_return_path_on_source(MigrationState *s);
 113static void migration_completion_end(MigrationState *s);
 114static void migrate_hup_delete(MigrationState *s);
 115
 116static void migration_downtime_start(MigrationState *s)
 117{
 118    trace_vmstate_downtime_checkpoint("src-downtime-start");
 119    s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 120}
 121
 122/*
 123 * This is unfortunate: incoming migration actually needs the outgoing
 124 * migration state (MigrationState) to be there too, e.g. to query
 125 * capabilities, parameters, using locks, setup errors, etc.
 126 *
 127 * NOTE: when calling this, making sure current_migration exists and not
 128 * been freed yet!  Otherwise trying to access the refcount is already
 129 * an use-after-free itself..
 130 *
 131 * TODO: Move shared part of incoming / outgoing out into separate object.
 132 * Then this is not needed.
 133 */
 134static void migrate_incoming_ref_outgoing_state(void)
 135{
 136    object_ref(migrate_get_current());
 137}
 138static void migrate_incoming_unref_outgoing_state(void)
 139{
 140    object_unref(migrate_get_current());
 141}
 142
 143static void migration_downtime_end(MigrationState *s)
 144{
 145    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 146
 147    /*
 148     * If downtime already set, should mean that postcopy already set it,
 149     * then that should be the real downtime already.
 150     */
 151    if (!s->downtime) {
 152        s->downtime = now - s->downtime_start;
 153        trace_vmstate_downtime_checkpoint("src-downtime-end");
 154    }
 155}
 156
 157static void precopy_notify_complete(void)
 158{
 159    Error *local_err = NULL;
 160
 161    if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
 162        error_report_err(local_err);
 163    }
 164
 165    trace_migration_precopy_complete();
 166}
 167
 168static bool migration_needs_multiple_sockets(void)
 169{
 170    return migrate_multifd() || migrate_postcopy_preempt();
 171}
 172
 173static RunState migration_get_target_runstate(void)
 174{
 175    /*
 176     * When the global state is not migrated, it means we don't know the
 177     * runstate of the src QEMU.  We don't have much choice but assuming
 178     * the VM is running.  NOTE: this is pretty rare case, so far only Xen
 179     * uses it.
 180     */
 181    if (!global_state_received()) {
 182        return RUN_STATE_RUNNING;
 183    }
 184
 185    return global_state_get_runstate();
 186}
 187
 188static bool transport_supports_multi_channels(MigrationAddress *addr)
 189{
 190    if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
 191        SocketAddress *saddr = &addr->u.socket;
 192
 193        return (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
 194                saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
 195                saddr->type == SOCKET_ADDRESS_TYPE_VSOCK);
 196    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
 197        return migrate_mapped_ram();
 198    } else {
 199        return false;
 200    }
 201}
 202
 203static bool migration_needs_seekable_channel(void)
 204{
 205    return migrate_mapped_ram();
 206}
 207
 208static bool migration_needs_extra_fds(void)
 209{
 210    /*
 211     * When doing direct-io, multifd requires two different,
 212     * non-duplicated file descriptors so we can use one of them for
 213     * unaligned IO.
 214     */
 215    return migrate_multifd() && migrate_direct_io();
 216}
 217
 218static bool transport_supports_seeking(MigrationAddress *addr)
 219{
 220    if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
 221        return true;
 222    }
 223
 224    return false;
 225}
 226
 227static bool transport_supports_extra_fds(MigrationAddress *addr)
 228{
 229    /* file: works because QEMU can open it multiple times */
 230    return addr->transport == MIGRATION_ADDRESS_TYPE_FILE;
 231}
 232
 233static bool
 234migration_channels_and_transport_compatible(MigrationAddress *addr,
 235                                            Error **errp)
 236{
 237    if (migration_needs_seekable_channel() &&
 238        !transport_supports_seeking(addr)) {
 239        error_setg(errp, "Migration requires seekable transport (e.g. file)");
 240        return false;
 241    }
 242
 243    if (migration_needs_multiple_sockets() &&
 244        !transport_supports_multi_channels(addr)) {
 245        error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
 246        return false;
 247    }
 248
 249    if (migration_needs_extra_fds() &&
 250        !transport_supports_extra_fds(addr)) {
 251        error_setg(errp,
 252                   "Migration requires a transport that allows for extra fds (e.g. file)");
 253        return false;
 254    }
 255
 256    if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
 257        addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
 258        error_setg(errp, "Migration requires streamable transport (eg unix)");
 259        return false;
 260    }
 261
 262    return true;
 263}
 264
 265static bool
 266migration_capabilities_and_transport_compatible(MigrationAddress *addr,
 267                                                Error **errp)
 268{
 269    if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
 270        return migrate_rdma_caps_check(migrate_get_current()->capabilities,
 271                                       errp);
 272    }
 273
 274    return true;
 275}
 276
 277static bool migration_transport_compatible(MigrationAddress *addr, Error **errp)
 278{
 279    return migration_channels_and_transport_compatible(addr, errp) &&
 280           migration_capabilities_and_transport_compatible(addr, errp);
 281}
 282
 283static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
 284{
 285    uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
 286
 287    return (a > b) - (a < b);
 288}
 289
 290static int migration_stop_vm(MigrationState *s, RunState state)
 291{
 292    int ret;
 293
 294    migration_downtime_start(s);
 295
 296    s->vm_old_state = runstate_get();
 297    global_state_store();
 298
 299    ret = vm_stop_force_state(state);
 300
 301    trace_vmstate_downtime_checkpoint("src-vm-stopped");
 302    trace_migration_completion_vm_stop(ret);
 303
 304    return ret;
 305}
 306
 307void migration_object_init(void)
 308{
 309    /* This can only be called once. */
 310    assert(!current_migration);
 311    current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
 312
 313    /*
 314     * Init the migrate incoming object as well no matter whether
 315     * we'll use it or not.
 316     */
 317    assert(!current_incoming);
 318    current_incoming = g_new0(MigrationIncomingState, 1);
 319    current_incoming->state = MIGRATION_STATUS_NONE;
 320    current_incoming->postcopy_remote_fds =
 321        g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
 322    qemu_mutex_init(&current_incoming->rp_mutex);
 323    qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
 324    qemu_event_init(&current_incoming->main_thread_load_event, false);
 325    qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
 326    qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
 327    qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
 328    qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
 329
 330    qemu_mutex_init(&current_incoming->page_request_mutex);
 331    qemu_cond_init(&current_incoming->page_request_cond);
 332    current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 333
 334    current_incoming->exit_on_error = INMIGRATE_DEFAULT_EXIT_ON_ERROR;
 335
 336    migration_object_check(current_migration, &error_fatal);
 337
 338    ram_mig_init();
 339    dirty_bitmap_mig_init();
 340
 341    /* Initialize cpu throttle timers */
 342    cpu_throttle_init();
 343}
 344
 345typedef struct {
 346    QEMUBH *bh;
 347    QEMUBHFunc *cb;
 348    void *opaque;
 349} MigrationBH;
 350
 351static void migration_bh_dispatch_bh(void *opaque)
 352{
 353    MigrationState *s = migrate_get_current();
 354    MigrationBH *migbh = opaque;
 355
 356    /* cleanup this BH */
 357    qemu_bh_delete(migbh->bh);
 358    migbh->bh = NULL;
 359
 360    /* dispatch the other one */
 361    migbh->cb(migbh->opaque);
 362    object_unref(OBJECT(s));
 363
 364    g_free(migbh);
 365}
 366
 367void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
 368{
 369    MigrationState *s = migrate_get_current();
 370    MigrationBH *migbh = g_new0(MigrationBH, 1);
 371    QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh);
 372
 373    /* Store these to dispatch when the BH runs */
 374    migbh->bh = bh;
 375    migbh->cb = cb;
 376    migbh->opaque = opaque;
 377
 378    /*
 379     * Ref the state for bh, because it may be called when
 380     * there're already no other refs
 381     */
 382    object_ref(OBJECT(s));
 383    qemu_bh_schedule(bh);
 384}
 385
 386void migration_shutdown(void)
 387{
 388    /*
 389     * When the QEMU main thread exit, the COLO thread
 390     * may wait a semaphore. So, we should wakeup the
 391     * COLO thread before migration shutdown.
 392     */
 393    colo_shutdown();
 394    /*
 395     * Cancel the current migration - that will (eventually)
 396     * stop the migration using this structure
 397     */
 398    migration_cancel();
 399    object_unref(OBJECT(current_migration));
 400
 401    /*
 402     * Cancel outgoing migration of dirty bitmaps. It should
 403     * at least unref used block nodes.
 404     */
 405    dirty_bitmap_mig_cancel_outgoing();
 406
 407    /*
 408     * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
 409     * are non-critical data, and their loss never considered as
 410     * something serious.
 411     */
 412    dirty_bitmap_mig_cancel_incoming();
 413}
 414
 415/* For outgoing */
 416MigrationState *migrate_get_current(void)
 417{
 418    /* This can only be called after the object created. */
 419    assert(current_migration);
 420    return current_migration;
 421}
 422
 423MigrationIncomingState *migration_incoming_get_current(void)
 424{
 425    assert(current_incoming);
 426    return current_incoming;
 427}
 428
 429void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
 430{
 431    if (mis->socket_address_list) {
 432        qapi_free_SocketAddressList(mis->socket_address_list);
 433        mis->socket_address_list = NULL;
 434    }
 435
 436    if (mis->transport_cleanup) {
 437        mis->transport_cleanup(mis->transport_data);
 438        mis->transport_data = mis->transport_cleanup = NULL;
 439    }
 440}
 441
 442void migration_incoming_state_destroy(void)
 443{
 444    struct MigrationIncomingState *mis = migration_incoming_get_current();
 445
 446    multifd_recv_cleanup();
 447
 448    /*
 449     * RAM state cleanup needs to happen after multifd cleanup, because
 450     * multifd threads can use some of its states (receivedmap).
 451     * The VFIO load_cleanup() implementation is BQL-sensitive. It requires
 452     * BQL must NOT be taken when recycling load threads, so that it won't
 453     * block the load threads from making progress on address space
 454     * modification operations.
 455     *
 456     * To make it work, we could try to not take BQL for all load_cleanup(),
 457     * or conditionally unlock BQL only if bql_locked() in VFIO.
 458     *
 459     * Since most existing call sites take BQL for load_cleanup(), make
 460     * it simple by taking BQL always as the rule, so that VFIO can unlock
 461     * BQL and retake unconditionally.
 462     */
 463    assert(bql_locked());
 464    qemu_loadvm_state_cleanup(mis);
 465
 466    if (mis->to_src_file) {
 467        /* Tell source that we are done */
 468        migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 469        qemu_fclose(mis->to_src_file);
 470        mis->to_src_file = NULL;
 471    }
 472
 473    if (mis->from_src_file) {
 474        migration_ioc_unregister_yank_from_file(mis->from_src_file);
 475        qemu_fclose(mis->from_src_file);
 476        mis->from_src_file = NULL;
 477    }
 478    if (mis->postcopy_remote_fds) {
 479        g_array_free(mis->postcopy_remote_fds, TRUE);
 480        mis->postcopy_remote_fds = NULL;
 481    }
 482
 483    migration_incoming_transport_cleanup(mis);
 484    qemu_event_reset(&mis->main_thread_load_event);
 485
 486    if (mis->page_requested) {
 487        g_tree_destroy(mis->page_requested);
 488        mis->page_requested = NULL;
 489    }
 490
 491    if (mis->postcopy_qemufile_dst) {
 492        migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
 493        qemu_fclose(mis->postcopy_qemufile_dst);
 494        mis->postcopy_qemufile_dst = NULL;
 495    }
 496
 497    cpr_set_incoming_mode(MIG_MODE_NONE);
 498    yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 499}
 500
 501static void migrate_generate_event(MigrationStatus new_state)
 502{
 503    if (migrate_events()) {
 504        qapi_event_send_migration(new_state);
 505    }
 506}
 507
 508/*
 509 * Send a message on the return channel back to the source
 510 * of the migration.
 511 */
 512static int migrate_send_rp_message(MigrationIncomingState *mis,
 513                                   enum mig_rp_message_type message_type,
 514                                   uint16_t len, void *data)
 515{
 516    int ret = 0;
 517
 518    trace_migrate_send_rp_message((int)message_type, len);
 519    QEMU_LOCK_GUARD(&mis->rp_mutex);
 520
 521    /*
 522     * It's possible that the file handle got lost due to network
 523     * failures.
 524     */
 525    if (!mis->to_src_file) {
 526        ret = -EIO;
 527        return ret;
 528    }
 529
 530    qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
 531    qemu_put_be16(mis->to_src_file, len);
 532    qemu_put_buffer(mis->to_src_file, data, len);
 533    return qemu_fflush(mis->to_src_file);
 534}
 535
 536/* Request one page from the source VM at the given start address.
 537 *   rb: the RAMBlock to request the page in
 538 *   Start: Address offset within the RB
 539 *   Len: Length in bytes required - must be a multiple of pagesize
 540 */
 541int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 542                                      RAMBlock *rb, ram_addr_t start)
 543{
 544    uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
 545    size_t msglen = 12; /* start + len */
 546    size_t len = qemu_ram_pagesize(rb);
 547    enum mig_rp_message_type msg_type;
 548    const char *rbname;
 549    int rbname_len;
 550
 551    *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
 552    *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
 553
 554    /*
 555     * We maintain the last ramblock that we requested for page.  Note that we
 556     * don't need locking because this function will only be called within the
 557     * postcopy ram fault thread.
 558     */
 559    if (rb != mis->last_rb) {
 560        mis->last_rb = rb;
 561
 562        rbname = qemu_ram_get_idstr(rb);
 563        rbname_len = strlen(rbname);
 564
 565        assert(rbname_len < 256);
 566
 567        bufc[msglen++] = rbname_len;
 568        memcpy(bufc + msglen, rbname, rbname_len);
 569        msglen += rbname_len;
 570        msg_type = MIG_RP_MSG_REQ_PAGES_ID;
 571    } else {
 572        msg_type = MIG_RP_MSG_REQ_PAGES;
 573    }
 574
 575    return migrate_send_rp_message(mis, msg_type, msglen, bufc);
 576}
 577
 578int migrate_send_rp_req_pages(MigrationIncomingState *mis,
 579                              RAMBlock *rb, ram_addr_t start, uint64_t haddr,
 580                              uint32_t tid)
 581{
 582    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
 583    bool received = false;
 584
 585    WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
 586        received = ramblock_recv_bitmap_test_byte_offset(rb, start);
 587        if (!received) {
 588            if (!g_tree_lookup(mis->page_requested, aligned)) {
 589                /*
 590                 * The page has not been received, and it's not yet in the
 591                 * page request list.  Queue it.  Set the value of element
 592                 * to 1, so that things like g_tree_lookup() will return
 593                 * TRUE (1) when found.
 594                 */
 595                g_tree_insert(mis->page_requested, aligned, (gpointer)1);
 596                qatomic_inc(&mis->page_requested_count);
 597                trace_postcopy_page_req_add(aligned, mis->page_requested_count);
 598            }
 599            mark_postcopy_blocktime_begin(haddr, tid, rb);
 600        }
 601    }
 602
 603    /*
 604     * If the page is there, skip sending the message.  We don't even need the
 605     * lock because as long as the page arrived, it'll be there forever.
 606     */
 607    if (received) {
 608        return 0;
 609    }
 610
 611    return migrate_send_rp_message_req_pages(mis, rb, start);
 612}
 613
 614static bool migration_colo_enabled;
 615bool migration_incoming_colo_enabled(void)
 616{
 617    return migration_colo_enabled;
 618}
 619
 620void migration_incoming_disable_colo(void)
 621{
 622    ram_block_discard_disable(false);
 623    migration_colo_enabled = false;
 624}
 625
 626int migration_incoming_enable_colo(void)
 627{
 628#ifndef CONFIG_REPLICATION
 629    error_report("ENABLE_COLO command come in migration stream, but the "
 630                 "replication module is not built in");
 631    return -ENOTSUP;
 632#endif
 633
 634    if (!migrate_colo()) {
 635        error_report("ENABLE_COLO command come in migration stream, but x-colo "
 636                     "capability is not set");
 637        return -EINVAL;
 638    }
 639
 640    if (ram_block_discard_disable(true)) {
 641        error_report("COLO: cannot disable RAM discard");
 642        return -EBUSY;
 643    }
 644    migration_colo_enabled = true;
 645    return 0;
 646}
 647
 648void migrate_add_address(SocketAddress *address)
 649{
 650    MigrationIncomingState *mis = migration_incoming_get_current();
 651
 652    QAPI_LIST_PREPEND(mis->socket_address_list,
 653                      QAPI_CLONE(SocketAddress, address));
 654}
 655
 656bool migrate_is_uri(const char *uri)
 657{
 658    while (*uri && *uri != ':') {
 659        if (!qemu_isalpha(*uri++)) {
 660            return false;
 661        }
 662    }
 663    return *uri == ':';
 664}
 665
 666bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
 667                       Error **errp)
 668{
 669    g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
 670    g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
 671    InetSocketAddress *isock = &addr->u.rdma;
 672    strList **tail = &addr->u.exec.args;
 673
 674    if (strstart(uri, "exec:", NULL)) {
 675        addr->transport = MIGRATION_ADDRESS_TYPE_EXEC;
 676#ifdef WIN32
 677        QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path()));
 678        QAPI_LIST_APPEND(tail, g_strdup("/c"));
 679#else
 680        QAPI_LIST_APPEND(tail, g_strdup("/bin/sh"));
 681        QAPI_LIST_APPEND(tail, g_strdup("-c"));
 682#endif
 683        QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:")));
 684    } else if (strstart(uri, "rdma:", NULL)) {
 685        if (inet_parse(isock, uri + strlen("rdma:"), errp)) {
 686            qapi_free_InetSocketAddress(isock);
 687            return false;
 688        }
 689        addr->transport = MIGRATION_ADDRESS_TYPE_RDMA;
 690    } else if (strstart(uri, "tcp:", NULL) ||
 691                strstart(uri, "unix:", NULL) ||
 692                strstart(uri, "vsock:", NULL) ||
 693                strstart(uri, "fd:", NULL)) {
 694        addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET;
 695        SocketAddress *saddr = socket_parse(uri, errp);
 696        if (!saddr) {
 697            return false;
 698        }
 699        addr->u.socket.type = saddr->type;
 700        addr->u.socket.u = saddr->u;
 701        /* Don't free the objects inside; their ownership moved to "addr" */
 702        g_free(saddr);
 703    } else if (strstart(uri, "file:", NULL)) {
 704        addr->transport = MIGRATION_ADDRESS_TYPE_FILE;
 705        addr->u.file.filename = g_strdup(uri + strlen("file:"));
 706        if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset,
 707                              errp)) {
 708            return false;
 709        }
 710    } else {
 711        error_setg(errp, "unknown migration protocol: %s", uri);
 712        return false;
 713    }
 714
 715    val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
 716    val->addr = g_steal_pointer(&addr);
 717    *channel = g_steal_pointer(&val);
 718    return true;
 719}
 720
 721static bool
 722migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
 723{
 724    MigrationStatus current = mis->state;
 725
 726    if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
 727        /*
 728         * Incoming postcopy migration will stay in PAUSED state even if
 729         * reconnection happened.
 730         */
 731        return true;
 732    }
 733
 734    if (current != MIGRATION_STATUS_NONE) {
 735        error_setg(errp, "Illegal migration incoming state: %s",
 736                   MigrationStatus_str(current));
 737        return false;
 738    }
 739
 740    migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP);
 741    return true;
 742}
 743
 744static void qemu_start_incoming_migration(const char *uri, bool has_channels,
 745                                          MigrationChannelList *channels,
 746                                          Error **errp)
 747{
 748    g_autoptr(MigrationChannel) channel = NULL;
 749    MigrationAddress *addr = NULL;
 750    MigrationIncomingState *mis = migration_incoming_get_current();
 751
 752    /*
 753     * Having preliminary checks for uri and channel
 754     */
 755    if (!uri == !channels) {
 756        error_setg(errp, "need either 'uri' or 'channels' argument");
 757        return;
 758    }
 759
 760    if (channels) {
 761        /* To verify that Migrate channel list has only item */
 762        if (channels->next) {
 763            error_setg(errp, "Channel list must have only one entry, "
 764                             "for type 'main'");
 765            return;
 766        }
 767        addr = channels->value->addr;
 768    }
 769
 770    if (uri) {
 771        /* caller uses the old URI syntax */
 772        if (!migrate_uri_parse(uri, &channel, errp)) {
 773            return;
 774        }
 775        addr = channel->addr;
 776    }
 777
 778    /* transport mechanism not suitable for migration? */
 779    if (!migration_transport_compatible(addr, errp)) {
 780        return;
 781    }
 782
 783    if (!migration_incoming_state_setup(mis, errp)) {
 784        return;
 785    }
 786
 787    if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
 788        SocketAddress *saddr = &addr->u.socket;
 789        if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
 790            saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
 791            saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
 792            socket_start_incoming_migration(saddr, errp);
 793        } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
 794            fd_start_incoming_migration(saddr->u.fd.str, errp);
 795        }
 796#ifdef CONFIG_RDMA
 797    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
 798        rdma_start_incoming_migration(&addr->u.rdma, errp);
 799#endif
 800    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
 801        exec_start_incoming_migration(addr->u.exec.args, errp);
 802    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
 803        file_start_incoming_migration(&addr->u.file, errp);
 804    } else {
 805        error_setg(errp, "unknown migration protocol: %s", uri);
 806    }
 807
 808    /* Close cpr socket to tell source that we are listening */
 809    cpr_state_close();
 810}
 811
 812static void process_incoming_migration_bh(void *opaque)
 813{
 814    MigrationIncomingState *mis = opaque;
 815
 816    trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
 817
 818    /*
 819     * This must happen after all error conditions are dealt with and
 820     * we're sure the VM is going to be running on this host.
 821     */
 822    qemu_announce_self(&mis->announce_timer, migrate_announce_params());
 823
 824    trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
 825
 826    multifd_recv_shutdown();
 827
 828    dirty_bitmap_mig_before_vm_start();
 829
 830    if (runstate_is_live(migration_get_target_runstate())) {
 831        if (autostart) {
 832            /*
 833             * Block activation is always delayed until VM starts, either
 834             * here (which means we need to start the dest VM right now..),
 835             * or until qmp_cont() later.
 836             *
 837             * We used to have cap 'late-block-activate' but now we do this
 838             * unconditionally, as it has no harm but only benefit.  E.g.,
 839             * it's not part of migration ABI on the time of disk activation.
 840             *
 841             * Make sure all file formats throw away their mutable
 842             * metadata.  If error, don't restart the VM yet.
 843             */
 844            if (migration_block_activate(NULL)) {
 845                vm_start();
 846            }
 847        } else {
 848            runstate_set(RUN_STATE_PAUSED);
 849        }
 850    } else if (migration_incoming_colo_enabled()) {
 851        migration_incoming_disable_colo();
 852        vm_start();
 853    } else {
 854        runstate_set(global_state_get_runstate());
 855    }
 856    trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
 857    /*
 858     * This must happen after any state changes since as soon as an external
 859     * observer sees this event they might start to prod at the VM assuming
 860     * it's ready to use.
 861     */
 862    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 863                      MIGRATION_STATUS_COMPLETED);
 864    migration_incoming_state_destroy();
 865}
 866
 867static void coroutine_fn
 868process_incoming_migration_co(void *opaque)
 869{
 870    MigrationState *s = migrate_get_current();
 871    MigrationIncomingState *mis = migration_incoming_get_current();
 872    PostcopyState ps;
 873    int ret;
 874    Error *local_err = NULL;
 875
 876    assert(mis->from_src_file);
 877
 878    mis->largest_page_size = qemu_ram_pagesize_largest();
 879    postcopy_state_set(POSTCOPY_INCOMING_NONE);
 880    migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
 881                      MIGRATION_STATUS_ACTIVE);
 882
 883    mis->loadvm_co = qemu_coroutine_self();
 884    ret = qemu_loadvm_state(mis->from_src_file);
 885    mis->loadvm_co = NULL;
 886
 887    trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
 888
 889    ps = postcopy_state_get();
 890    trace_process_incoming_migration_co_end(ret, ps);
 891    if (ps != POSTCOPY_INCOMING_NONE) {
 892        if (ps == POSTCOPY_INCOMING_ADVISE) {
 893            /*
 894             * Where a migration had postcopy enabled (and thus went to advise)
 895             * but managed to complete within the precopy period, we can use
 896             * the normal exit.
 897             */
 898            postcopy_ram_incoming_cleanup(mis);
 899        } else if (ret >= 0) {
 900            /*
 901             * Postcopy was started, cleanup should happen at the end of the
 902             * postcopy thread.
 903             */
 904            trace_process_incoming_migration_co_postcopy_end_main();
 905            goto out;
 906        }
 907        /* Else if something went wrong then just fall out of the normal exit */
 908    }
 909
 910    if (ret < 0) {
 911        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
 912        goto fail;
 913    }
 914
 915    if (migration_incoming_colo_enabled()) {
 916        /* yield until COLO exit */
 917        colo_incoming_co();
 918    }
 919
 920    migration_bh_schedule(process_incoming_migration_bh, mis);
 921    goto out;
 922
 923fail:
 924    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 925                      MIGRATION_STATUS_FAILED);
 926    migrate_set_error(s, local_err);
 927    error_free(local_err);
 928
 929    migration_incoming_state_destroy();
 930
 931    if (mis->exit_on_error) {
 932        WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
 933            error_report_err(s->error);
 934            s->error = NULL;
 935        }
 936
 937        exit(EXIT_FAILURE);
 938    }
 939out:
 940    /* Pairs with the refcount taken in qmp_migrate_incoming() */
 941    migrate_incoming_unref_outgoing_state();
 942}
 943
 944/**
 945 * migration_incoming_setup: Setup incoming migration
 946 * @f: file for main migration channel
 947 */
 948static void migration_incoming_setup(QEMUFile *f)
 949{
 950    MigrationIncomingState *mis = migration_incoming_get_current();
 951
 952    assert(!mis->from_src_file);
 953    mis->from_src_file = f;
 954    qemu_file_set_blocking(f, false);
 955}
 956
 957void migration_incoming_process(void)
 958{
 959    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
 960    qemu_coroutine_enter(co);
 961}
 962
 963/* Returns true if recovered from a paused migration, otherwise false */
 964static bool postcopy_try_recover(void)
 965{
 966    MigrationIncomingState *mis = migration_incoming_get_current();
 967
 968    if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
 969        /* Resumed from a paused postcopy migration */
 970
 971        /* This should be set already in migration_incoming_setup() */
 972        assert(mis->from_src_file);
 973        /* Postcopy has standalone thread to do vm load */
 974        qemu_file_set_blocking(mis->from_src_file, true);
 975
 976        /* Re-configure the return path */
 977        mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
 978
 979        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
 980                          MIGRATION_STATUS_POSTCOPY_RECOVER);
 981
 982        /*
 983         * Here, we only wake up the main loading thread (while the
 984         * rest threads will still be waiting), so that we can receive
 985         * commands from source now, and answer it if needed. The
 986         * rest threads will be woken up afterwards until we are sure
 987         * that source is ready to reply to page requests.
 988         */
 989        qemu_sem_post(&mis->postcopy_pause_sem_dst);
 990        return true;
 991    }
 992
 993    return false;
 994}
 995
 996void migration_fd_process_incoming(QEMUFile *f)
 997{
 998    migration_incoming_setup(f);
 999    if (postcopy_try_recover()) {
1000        return;
1001    }
1002    migration_incoming_process();
1003}
1004
1005static bool migration_has_main_and_multifd_channels(void)
1006{
1007    MigrationIncomingState *mis = migration_incoming_get_current();
1008    if (!mis->from_src_file) {
1009        /* main channel not established */
1010        return false;
1011    }
1012
1013    if (migrate_multifd() && !multifd_recv_all_channels_created()) {
1014        return false;
1015    }
1016
1017    /* main and all multifd channels are established */
1018    return true;
1019}
1020
1021void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
1022{
1023    MigrationIncomingState *mis = migration_incoming_get_current();
1024    Error *local_err = NULL;
1025    QEMUFile *f;
1026    uint8_t channel;
1027    uint32_t channel_magic = 0;
1028    int ret = 0;
1029
1030    if (!migration_has_main_and_multifd_channels()) {
1031        if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
1032            /*
1033             * With multiple channels, it is possible that we receive channels
1034             * out of order on destination side, causing incorrect mapping of
1035             * source channels on destination side. Check channel MAGIC to
1036             * decide type of channel. Please note this is best effort,
1037             * postcopy preempt channel does not send any magic number so
1038             * avoid it for postcopy live migration. Also tls live migration
1039             * already does tls handshake while initializing main channel so
1040             * with tls this issue is not possible.
1041             */
1042            ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
1043                                              sizeof(channel_magic), errp);
1044            if (ret != 0) {
1045                return;
1046            }
1047
1048            channel_magic = be32_to_cpu(channel_magic);
1049            if (channel_magic == QEMU_VM_FILE_MAGIC) {
1050                channel = CH_MAIN;
1051            } else if (channel_magic == MULTIFD_MAGIC) {
1052                assert(migrate_multifd());
1053                channel = CH_MULTIFD;
1054            } else if (!mis->from_src_file &&
1055                        mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
1056                /* reconnect main channel for postcopy recovery */
1057                channel = CH_MAIN;
1058            } else {
1059                error_setg(errp, "unknown channel magic: %u", channel_magic);
1060                return;
1061            }
1062        } else if (mis->from_src_file && migrate_multifd()) {
1063            /*
1064             * Non-peekable channels like tls/file are processed as
1065             * multifd channels when multifd is enabled.
1066             */
1067            channel = CH_MULTIFD;
1068        } else if (!mis->from_src_file) {
1069            channel = CH_MAIN;
1070        } else {
1071            error_setg(errp, "non-peekable channel used without multifd");
1072            return;
1073        }
1074    } else {
1075        assert(migrate_postcopy_preempt());
1076        channel = CH_POSTCOPY;
1077    }
1078
1079    if (multifd_recv_setup(errp) != 0) {
1080        return;
1081    }
1082
1083    if (channel == CH_MAIN) {
1084        f = qemu_file_new_input(ioc);
1085        migration_incoming_setup(f);
1086    } else if (channel == CH_MULTIFD) {
1087        /* Multiple connections */
1088        multifd_recv_new_channel(ioc, &local_err);
1089        if (local_err) {
1090            error_propagate(errp, local_err);
1091            return;
1092        }
1093    } else if (channel == CH_POSTCOPY) {
1094        assert(!mis->postcopy_qemufile_dst);
1095        f = qemu_file_new_input(ioc);
1096        postcopy_preempt_new_channel(mis, f);
1097        return;
1098    }
1099
1100    if (migration_has_main_and_multifd_channels()) {
1101        /* If it's a recovery, we're done */
1102        if (postcopy_try_recover()) {
1103            return;
1104        }
1105        migration_incoming_process();
1106    }
1107}
1108
1109/**
1110 * @migration_has_all_channels: We have received all channels that we need
1111 *
1112 * Returns true when we have got connections to all the channels that
1113 * we need for migration.
1114 */
1115bool migration_has_all_channels(void)
1116{
1117    if (!migration_has_main_and_multifd_channels()) {
1118        return false;
1119    }
1120
1121    MigrationIncomingState *mis = migration_incoming_get_current();
1122    if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) {
1123        return false;
1124    }
1125
1126    return true;
1127}
1128
1129int migrate_send_rp_switchover_ack(MigrationIncomingState *mis)
1130{
1131    return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL);
1132}
1133
1134/*
1135 * Send a 'SHUT' message on the return channel with the given value
1136 * to indicate that we've finished with the RP.  Non-0 value indicates
1137 * error.
1138 */
1139void migrate_send_rp_shut(MigrationIncomingState *mis,
1140                          uint32_t value)
1141{
1142    uint32_t buf;
1143
1144    buf = cpu_to_be32(value);
1145    migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
1146}
1147
1148/*
1149 * Send a 'PONG' message on the return channel with the given value
1150 * (normally in response to a 'PING')
1151 */
1152void migrate_send_rp_pong(MigrationIncomingState *mis,
1153                          uint32_t value)
1154{
1155    uint32_t buf;
1156
1157    buf = cpu_to_be32(value);
1158    migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
1159}
1160
1161void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
1162                                 char *block_name)
1163{
1164    char buf[512];
1165    int len;
1166    int64_t res;
1167
1168    /*
1169     * First, we send the header part. It contains only the len of
1170     * idstr, and the idstr itself.
1171     */
1172    len = strlen(block_name);
1173    buf[0] = len;
1174    memcpy(buf + 1, block_name, len);
1175
1176    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
1177        error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
1178                     __func__);
1179        return;
1180    }
1181
1182    migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
1183
1184    /*
1185     * Next, we dump the received bitmap to the stream.
1186     *
1187     * TODO: currently we are safe since we are the only one that is
1188     * using the to_src_file handle (fault thread is still paused),
1189     * and it's ok even not taking the mutex. However the best way is
1190     * to take the lock before sending the message header, and release
1191     * the lock after sending the bitmap.
1192     */
1193    qemu_mutex_lock(&mis->rp_mutex);
1194    res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
1195    qemu_mutex_unlock(&mis->rp_mutex);
1196
1197    trace_migrate_send_rp_recv_bitmap(block_name, res);
1198}
1199
1200void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
1201{
1202    uint32_t buf;
1203
1204    buf = cpu_to_be32(value);
1205    migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
1206}
1207
1208bool migration_is_running(void)
1209{
1210    MigrationState *s = current_migration;
1211
1212    if (!s) {
1213        return false;
1214    }
1215
1216    switch (s->state) {
1217    case MIGRATION_STATUS_ACTIVE:
1218    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1219    case MIGRATION_STATUS_POSTCOPY_PAUSED:
1220    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1221    case MIGRATION_STATUS_POSTCOPY_RECOVER:
1222    case MIGRATION_STATUS_SETUP:
1223    case MIGRATION_STATUS_PRE_SWITCHOVER:
1224    case MIGRATION_STATUS_DEVICE:
1225    case MIGRATION_STATUS_WAIT_UNPLUG:
1226    case MIGRATION_STATUS_CANCELLING:
1227    case MIGRATION_STATUS_COLO:
1228        return true;
1229    default:
1230        return false;
1231    }
1232}
1233
1234static bool migration_is_active(void)
1235{
1236    MigrationState *s = current_migration;
1237
1238    return (s->state == MIGRATION_STATUS_ACTIVE ||
1239            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1240}
1241
1242static bool migrate_show_downtime(MigrationState *s)
1243{
1244    return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
1245}
1246
1247static void populate_time_info(MigrationInfo *info, MigrationState *s)
1248{
1249    info->has_status = true;
1250    info->has_setup_time = true;
1251    info->setup_time = s->setup_time;
1252
1253    if (s->state == MIGRATION_STATUS_COMPLETED) {
1254        info->has_total_time = true;
1255        info->total_time = s->total_time;
1256    } else {
1257        info->has_total_time = true;
1258        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
1259                           s->start_time;
1260    }
1261
1262    if (migrate_show_downtime(s)) {
1263        info->has_downtime = true;
1264        info->downtime = s->downtime;
1265    } else {
1266        info->has_expected_downtime = true;
1267        info->expected_downtime = s->expected_downtime;
1268    }
1269}
1270
1271static void populate_ram_info(MigrationInfo *info, MigrationState *s)
1272{
1273    size_t page_size = qemu_target_page_size();
1274
1275    info->ram = g_malloc0(sizeof(*info->ram));
1276    info->ram->transferred = migration_transferred_bytes();
1277    info->ram->total = ram_bytes_total();
1278    info->ram->duplicate = stat64_get(&mig_stats.zero_pages);
1279    info->ram->normal = stat64_get(&mig_stats.normal_pages);
1280    info->ram->normal_bytes = info->ram->normal * page_size;
1281    info->ram->mbps = s->mbps;
1282    info->ram->dirty_sync_count =
1283        stat64_get(&mig_stats.dirty_sync_count);
1284    info->ram->dirty_sync_missed_zero_copy =
1285        stat64_get(&mig_stats.dirty_sync_missed_zero_copy);
1286    info->ram->postcopy_requests =
1287        stat64_get(&mig_stats.postcopy_requests);
1288    info->ram->page_size = page_size;
1289    info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes);
1290    info->ram->pages_per_second = s->pages_per_second;
1291    info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes);
1292    info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes);
1293    info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes);
1294
1295    if (migrate_xbzrle()) {
1296        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
1297        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
1298        info->xbzrle_cache->bytes = xbzrle_counters.bytes;
1299        info->xbzrle_cache->pages = xbzrle_counters.pages;
1300        info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
1301        info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
1302        info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
1303        info->xbzrle_cache->overflow = xbzrle_counters.overflow;
1304    }
1305
1306    if (cpu_throttle_active()) {
1307        info->has_cpu_throttle_percentage = true;
1308        info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1309    }
1310
1311    if (s->state != MIGRATION_STATUS_COMPLETED) {
1312        info->ram->remaining = ram_bytes_remaining();
1313        info->ram->dirty_pages_rate =
1314           stat64_get(&mig_stats.dirty_pages_rate);
1315    }
1316
1317    if (migrate_dirty_limit() && dirtylimit_in_service()) {
1318        info->has_dirty_limit_throttle_time_per_round = true;
1319        info->dirty_limit_throttle_time_per_round =
1320                            dirtylimit_throttle_time_per_round();
1321
1322        info->has_dirty_limit_ring_full_time = true;
1323        info->dirty_limit_ring_full_time = dirtylimit_ring_full_time();
1324    }
1325}
1326
1327static void fill_source_migration_info(MigrationInfo *info)
1328{
1329    MigrationState *s = migrate_get_current();
1330    int state = qatomic_read(&s->state);
1331    GSList *cur_blocker = migration_blockers[migrate_mode()];
1332
1333    info->blocked_reasons = NULL;
1334
1335    /*
1336     * There are two types of reasons a migration might be blocked;
1337     * a) devices marked in VMState as non-migratable, and
1338     * b) Explicit migration blockers
1339     * We need to add both of them here.
1340     */
1341    qemu_savevm_non_migratable_list(&info->blocked_reasons);
1342
1343    while (cur_blocker) {
1344        QAPI_LIST_PREPEND(info->blocked_reasons,
1345                          g_strdup(error_get_pretty(cur_blocker->data)));
1346        cur_blocker = g_slist_next(cur_blocker);
1347    }
1348    info->has_blocked_reasons = info->blocked_reasons != NULL;
1349
1350    switch (state) {
1351    case MIGRATION_STATUS_NONE:
1352        /* no migration has happened ever */
1353        /* do not overwrite destination migration status */
1354        return;
1355    case MIGRATION_STATUS_SETUP:
1356        info->has_status = true;
1357        info->has_total_time = false;
1358        break;
1359    case MIGRATION_STATUS_ACTIVE:
1360    case MIGRATION_STATUS_CANCELLING:
1361    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1362    case MIGRATION_STATUS_PRE_SWITCHOVER:
1363    case MIGRATION_STATUS_DEVICE:
1364    case MIGRATION_STATUS_POSTCOPY_PAUSED:
1365    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1366    case MIGRATION_STATUS_POSTCOPY_RECOVER:
1367        /* TODO add some postcopy stats */
1368        populate_time_info(info, s);
1369        populate_ram_info(info, s);
1370        migration_populate_vfio_info(info);
1371        break;
1372    case MIGRATION_STATUS_COLO:
1373        info->has_status = true;
1374        /* TODO: display COLO specific information (checkpoint info etc.) */
1375        break;
1376    case MIGRATION_STATUS_COMPLETED:
1377        populate_time_info(info, s);
1378        populate_ram_info(info, s);
1379        migration_populate_vfio_info(info);
1380        break;
1381    case MIGRATION_STATUS_FAILED:
1382        info->has_status = true;
1383        break;
1384    case MIGRATION_STATUS_CANCELLED:
1385        info->has_status = true;
1386        break;
1387    case MIGRATION_STATUS_WAIT_UNPLUG:
1388        info->has_status = true;
1389        break;
1390    }
1391    info->status = state;
1392
1393    QEMU_LOCK_GUARD(&s->error_mutex);
1394    if (s->error) {
1395        info->error_desc = g_strdup(error_get_pretty(s->error));
1396    }
1397}
1398
1399static void fill_destination_migration_info(MigrationInfo *info)
1400{
1401    MigrationIncomingState *mis = migration_incoming_get_current();
1402
1403    if (mis->socket_address_list) {
1404        info->has_socket_address = true;
1405        info->socket_address =
1406            QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1407    }
1408
1409    switch (mis->state) {
1410    case MIGRATION_STATUS_SETUP:
1411    case MIGRATION_STATUS_CANCELLING:
1412    case MIGRATION_STATUS_CANCELLED:
1413    case MIGRATION_STATUS_ACTIVE:
1414    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1415    case MIGRATION_STATUS_POSTCOPY_PAUSED:
1416    case MIGRATION_STATUS_POSTCOPY_RECOVER:
1417    case MIGRATION_STATUS_FAILED:
1418    case MIGRATION_STATUS_COLO:
1419        info->has_status = true;
1420        break;
1421    case MIGRATION_STATUS_COMPLETED:
1422        info->has_status = true;
1423        fill_destination_postcopy_migration_info(info);
1424        break;
1425    default:
1426        return;
1427    }
1428    info->status = mis->state;
1429
1430    if (!info->error_desc) {
1431        MigrationState *s = migrate_get_current();
1432        QEMU_LOCK_GUARD(&s->error_mutex);
1433
1434        if (s->error) {
1435            info->error_desc = g_strdup(error_get_pretty(s->error));
1436        }
1437    }
1438}
1439
1440MigrationInfo *qmp_query_migrate(Error **errp)
1441{
1442    MigrationInfo *info = g_malloc0(sizeof(*info));
1443
1444    fill_destination_migration_info(info);
1445    fill_source_migration_info(info);
1446
1447    return info;
1448}
1449
1450void qmp_migrate_start_postcopy(Error **errp)
1451{
1452    MigrationState *s = migrate_get_current();
1453
1454    if (!migrate_postcopy()) {
1455        error_setg(errp, "Enable postcopy with migrate_set_capability before"
1456                         " the start of migration");
1457        return;
1458    }
1459
1460    if (s->state == MIGRATION_STATUS_NONE) {
1461        error_setg(errp, "Postcopy must be started after migration has been"
1462                         " started");
1463        return;
1464    }
1465    /*
1466     * we don't error if migration has finished since that would be racy
1467     * with issuing this command.
1468     */
1469    qatomic_set(&s->start_postcopy, true);
1470}
1471
1472/* shared migration helpers */
1473
1474void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
1475                       MigrationStatus new_state)
1476{
1477    assert(new_state < MIGRATION_STATUS__MAX);
1478    if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1479        trace_migrate_set_state(MigrationStatus_str(new_state));
1480        migrate_generate_event(new_state);
1481    }
1482}
1483
1484static void migration_cleanup_json_writer(MigrationState *s)
1485{
1486    g_clear_pointer(&s->vmdesc, json_writer_free);
1487}
1488
1489static void migration_cleanup(MigrationState *s)
1490{
1491    MigrationEventType type;
1492    QEMUFile *tmp = NULL;
1493
1494    trace_migration_cleanup();
1495
1496    migration_cleanup_json_writer(s);
1497
1498    g_free(s->hostname);
1499    s->hostname = NULL;
1500
1501    qemu_savevm_state_cleanup();
1502    cpr_state_close();
1503    migrate_hup_delete(s);
1504
1505    close_return_path_on_source(s);
1506
1507    if (s->migration_thread_running) {
1508        bql_unlock();
1509        qemu_thread_join(&s->thread);
1510        s->migration_thread_running = false;
1511        bql_lock();
1512    }
1513
1514    WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1515        /*
1516         * Close the file handle without the lock to make sure the critical
1517         * section won't block for long.
1518         */
1519        tmp = s->to_dst_file;
1520        s->to_dst_file = NULL;
1521    }
1522
1523    if (tmp) {
1524        /*
1525         * We only need to shutdown multifd if tmp!=NULL, because if
1526         * tmp==NULL, it means the main channel isn't established, while
1527         * multifd is only setup after that (in migration_thread()).
1528         */
1529        multifd_send_shutdown();
1530        migration_ioc_unregister_yank_from_file(tmp);
1531        qemu_fclose(tmp);
1532    }
1533
1534    assert(!migration_is_active());
1535
1536    if (s->state == MIGRATION_STATUS_CANCELLING) {
1537        migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1538                          MIGRATION_STATUS_CANCELLED);
1539    }
1540
1541    if (s->error) {
1542        /* It is used on info migrate.  We can't free it */
1543        error_report_err(error_copy(s->error));
1544    }
1545    type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED :
1546                                     MIG_EVENT_PRECOPY_DONE;
1547    migration_call_notifiers(s, type, NULL);
1548    yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1549}
1550
1551static void migration_cleanup_bh(void *opaque)
1552{
1553    migration_cleanup(opaque);
1554}
1555
1556void migrate_set_error(MigrationState *s, const Error *error)
1557{
1558    QEMU_LOCK_GUARD(&s->error_mutex);
1559
1560    trace_migrate_error(error_get_pretty(error));
1561
1562    if (!s->error) {
1563        s->error = error_copy(error);
1564    }
1565}
1566
1567bool migrate_has_error(MigrationState *s)
1568{
1569    /* The lock is not helpful here, but still follow the rule */
1570    QEMU_LOCK_GUARD(&s->error_mutex);
1571    return qatomic_read(&s->error);
1572}
1573
1574static void migrate_error_free(MigrationState *s)
1575{
1576    QEMU_LOCK_GUARD(&s->error_mutex);
1577    if (s->error) {
1578        error_free(s->error);
1579        s->error = NULL;
1580    }
1581}
1582
1583static void migration_connect_set_error(MigrationState *s, const Error *error)
1584{
1585    MigrationStatus current = s->state;
1586    MigrationStatus next;
1587
1588    assert(s->to_dst_file == NULL);
1589
1590    switch (current) {
1591    case MIGRATION_STATUS_SETUP:
1592        next = MIGRATION_STATUS_FAILED;
1593        break;
1594    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1595        /* Never fail a postcopy migration; switch back to PAUSED instead */
1596        next = MIGRATION_STATUS_POSTCOPY_PAUSED;
1597        break;
1598    default:
1599        /*
1600         * This really shouldn't happen. Just be careful to not crash a VM
1601         * just for this.  Instead, dump something.
1602         */
1603        error_report("%s: Illegal migration status (%s) detected",
1604                     __func__, MigrationStatus_str(current));
1605        return;
1606    }
1607
1608    migrate_set_state(&s->state, current, next);
1609    migrate_set_error(s, error);
1610}
1611
1612void migration_cancel(void)
1613{
1614    MigrationState *s = migrate_get_current();
1615    int old_state ;
1616    bool setup = (s->state == MIGRATION_STATUS_SETUP);
1617
1618    trace_migration_cancel();
1619
1620    if (migrate_dirty_limit()) {
1621        qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
1622    }
1623
1624    WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1625        if (s->rp_state.from_dst_file) {
1626            /* shutdown the rp socket, so causing the rp thread to shutdown */
1627            qemu_file_shutdown(s->rp_state.from_dst_file);
1628        }
1629    }
1630
1631    do {
1632        old_state = s->state;
1633        if (!migration_is_running()) {
1634            break;
1635        }
1636        /* If the migration is paused, kick it out of the pause */
1637        if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1638            qemu_event_set(&s->pause_event);
1639        }
1640        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1641    } while (s->state != MIGRATION_STATUS_CANCELLING);
1642
1643    /*
1644     * If we're unlucky the migration code might be stuck somewhere in a
1645     * send/write while the network has failed and is waiting to timeout;
1646     * if we've got shutdown(2) available then we can force it to quit.
1647     */
1648    if (s->state == MIGRATION_STATUS_CANCELLING) {
1649        WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1650            if (s->to_dst_file) {
1651                qemu_file_shutdown(s->to_dst_file);
1652            }
1653        }
1654    }
1655
1656    /*
1657     * If qmp_migrate_finish has not been called, then there is no path that
1658     * will complete the cancellation.  Do it now.
1659     */
1660    if (setup && !s->to_dst_file) {
1661        migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1662                          MIGRATION_STATUS_CANCELLED);
1663        cpr_state_close();
1664        migrate_hup_delete(s);
1665    }
1666}
1667
1668void migration_add_notifier_mode(NotifierWithReturn *notify,
1669                                 MigrationNotifyFunc func, MigMode mode)
1670{
1671    notify->notify = (NotifierWithReturnFunc)func;
1672    notifier_with_return_list_add(&migration_state_notifiers[mode], notify);
1673}
1674
1675void migration_add_notifier(NotifierWithReturn *notify,
1676                            MigrationNotifyFunc func)
1677{
1678    migration_add_notifier_mode(notify, func, MIG_MODE_NORMAL);
1679}
1680
1681void migration_remove_notifier(NotifierWithReturn *notify)
1682{
1683    if (notify->notify) {
1684        notifier_with_return_remove(notify);
1685        notify->notify = NULL;
1686    }
1687}
1688
1689int migration_call_notifiers(MigrationState *s, MigrationEventType type,
1690                             Error **errp)
1691{
1692    MigMode mode = s->parameters.mode;
1693    MigrationEvent e;
1694    int ret;
1695
1696    e.type = type;
1697    ret = notifier_with_return_list_notify(&migration_state_notifiers[mode],
1698                                           &e, errp);
1699    assert(!ret || type == MIG_EVENT_PRECOPY_SETUP);
1700    return ret;
1701}
1702
1703bool migration_has_failed(MigrationState *s)
1704{
1705    return (s->state == MIGRATION_STATUS_CANCELLED ||
1706            s->state == MIGRATION_STATUS_FAILED);
1707}
1708
1709bool migration_in_postcopy(void)
1710{
1711    MigrationState *s = migrate_get_current();
1712
1713    switch (s->state) {
1714    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1715    case MIGRATION_STATUS_POSTCOPY_PAUSED:
1716    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1717    case MIGRATION_STATUS_POSTCOPY_RECOVER:
1718        return true;
1719    default:
1720        return false;
1721    }
1722}
1723
1724bool migration_postcopy_is_alive(MigrationStatus state)
1725{
1726    switch (state) {
1727    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1728    case MIGRATION_STATUS_POSTCOPY_RECOVER:
1729        return true;
1730    default:
1731        return false;
1732    }
1733}
1734
1735bool migration_in_incoming_postcopy(void)
1736{
1737    PostcopyState ps = postcopy_state_get();
1738
1739    return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
1740}
1741
1742bool migration_incoming_postcopy_advised(void)
1743{
1744    PostcopyState ps = postcopy_state_get();
1745
1746    return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
1747}
1748
1749bool migration_in_bg_snapshot(void)
1750{
1751    return migrate_background_snapshot() && migration_is_running();
1752}
1753
1754bool migration_thread_is_self(void)
1755{
1756    MigrationState *s = current_migration;
1757
1758    return qemu_thread_is_self(&s->thread);
1759}
1760
1761bool migrate_mode_is_cpr(MigrationState *s)
1762{
1763    MigMode mode = s->parameters.mode;
1764    return mode == MIG_MODE_CPR_REBOOT ||
1765           mode == MIG_MODE_CPR_TRANSFER;
1766}
1767
1768int migrate_init(MigrationState *s, Error **errp)
1769{
1770    int ret;
1771
1772    ret = qemu_savevm_state_prepare(errp);
1773    if (ret) {
1774        return ret;
1775    }
1776
1777    /*
1778     * Reinitialise all migration state, except
1779     * parameters/capabilities that the user set, and
1780     * locks.
1781     */
1782    s->to_dst_file = NULL;
1783    s->state = MIGRATION_STATUS_NONE;
1784    s->rp_state.from_dst_file = NULL;
1785    s->mbps = 0.0;
1786    s->pages_per_second = 0.0;
1787    s->downtime = 0;
1788    s->expected_downtime = 0;
1789    s->setup_time = 0;
1790    s->start_postcopy = false;
1791    s->migration_thread_running = false;
1792    error_free(s->error);
1793    s->error = NULL;
1794
1795    if (should_send_vmdesc()) {
1796        s->vmdesc = json_writer_new(false);
1797    }
1798
1799    migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1800
1801    s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1802    s->total_time = 0;
1803    s->vm_old_state = -1;
1804    s->iteration_initial_bytes = 0;
1805    s->threshold_size = 0;
1806    s->switchover_acked = false;
1807    s->rdma_migration = false;
1808    /*
1809     * set mig_stats memory to zero for a new migration
1810     */
1811    memset(&mig_stats, 0, sizeof(mig_stats));
1812    migration_reset_vfio_bytes_transferred();
1813
1814    return 0;
1815}
1816
1817static bool is_busy(Error **reasonp, Error **errp)
1818{
1819    ERRP_GUARD();
1820
1821    /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
1822    if (runstate_check(RUN_STATE_SAVE_VM) || migration_is_running()) {
1823        error_propagate_prepend(errp, *reasonp,
1824                                "disallowing migration blocker "
1825                                "(migration/snapshot in progress) for: ");
1826        *reasonp = NULL;
1827        return true;
1828    }
1829    return false;
1830}
1831
1832static bool is_only_migratable(Error **reasonp, Error **errp, int modes)
1833{
1834    ERRP_GUARD();
1835
1836    if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) {
1837        error_propagate_prepend(errp, *reasonp,
1838                                "disallowing migration blocker "
1839                                "(--only-migratable) for: ");
1840        *reasonp = NULL;
1841        return true;
1842    }
1843    return false;
1844}
1845
1846static int get_modes(MigMode mode, va_list ap)
1847{
1848    int modes = 0;
1849
1850    while (mode != -1 && mode != MIG_MODE_ALL) {
1851        assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX);
1852        modes |= BIT(mode);
1853        mode = va_arg(ap, MigMode);
1854    }
1855    if (mode == MIG_MODE_ALL) {
1856        modes = BIT(MIG_MODE__MAX) - 1;
1857    }
1858    return modes;
1859}
1860
1861static int add_blockers(Error **reasonp, Error **errp, int modes)
1862{
1863    for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
1864        if (modes & BIT(mode)) {
1865            migration_blockers[mode] = g_slist_prepend(migration_blockers[mode],
1866                                                       *reasonp);
1867        }
1868    }
1869    return 0;
1870}
1871
1872int migrate_add_blocker(Error **reasonp, Error **errp)
1873{
1874    return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL);
1875}
1876
1877int migrate_add_blocker_normal(Error **reasonp, Error **errp)
1878{
1879    return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1);
1880}
1881
1882int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...)
1883{
1884    int modes;
1885    va_list ap;
1886
1887    va_start(ap, mode);
1888    modes = get_modes(mode, ap);
1889    va_end(ap);
1890
1891    if (is_only_migratable(reasonp, errp, modes)) {
1892        return -EACCES;
1893    } else if (is_busy(reasonp, errp)) {
1894        return -EBUSY;
1895    }
1896    return add_blockers(reasonp, errp, modes);
1897}
1898
1899int migrate_add_blocker_internal(Error **reasonp, Error **errp)
1900{
1901    int modes = BIT(MIG_MODE__MAX) - 1;
1902
1903    if (is_busy(reasonp, errp)) {
1904        return -EBUSY;
1905    }
1906    return add_blockers(reasonp, errp, modes);
1907}
1908
1909void migrate_del_blocker(Error **reasonp)
1910{
1911    if (*reasonp) {
1912        for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
1913            migration_blockers[mode] = g_slist_remove(migration_blockers[mode],
1914                                                      *reasonp);
1915        }
1916        error_free(*reasonp);
1917        *reasonp = NULL;
1918    }
1919}
1920
1921void qmp_migrate_incoming(const char *uri, bool has_channels,
1922                          MigrationChannelList *channels,
1923                          bool has_exit_on_error, bool exit_on_error,
1924                          Error **errp)
1925{
1926    Error *local_err = NULL;
1927    static bool once = true;
1928    MigrationIncomingState *mis = migration_incoming_get_current();
1929
1930    if (!once) {
1931        error_setg(errp, "The incoming migration has already been started");
1932        return;
1933    }
1934    if (!runstate_check(RUN_STATE_INMIGRATE)) {
1935        error_setg(errp, "'-incoming' was not specified on the command line");
1936        return;
1937    }
1938
1939    if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
1940        return;
1941    }
1942
1943    mis->exit_on_error =
1944        has_exit_on_error ? exit_on_error : INMIGRATE_DEFAULT_EXIT_ON_ERROR;
1945
1946    qemu_start_incoming_migration(uri, has_channels, channels, &local_err);
1947
1948    if (local_err) {
1949        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1950        error_propagate(errp, local_err);
1951        return;
1952    }
1953
1954    /*
1955     * Making sure MigrationState is available until incoming migration
1956     * completes.
1957     *
1958     * NOTE: QEMU _might_ leak this refcount in some failure paths, but
1959     * that's OK.  This is the minimum change we need to at least making
1960     * sure success case is clean on the refcount.  We can try harder to
1961     * make it accurate for any kind of failures, but it might be an
1962     * overkill and doesn't bring us much benefit.
1963     */
1964    migrate_incoming_ref_outgoing_state();
1965    once = false;
1966}
1967
1968void qmp_migrate_recover(const char *uri, Error **errp)
1969{
1970    MigrationIncomingState *mis = migration_incoming_get_current();
1971
1972    /*
1973     * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
1974     * callers (no one should ignore a recover failure); if there is, it's a
1975     * programming error.
1976     */
1977    assert(errp);
1978
1979    if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1980        error_setg(errp, "Migrate recover can only be run "
1981                   "when postcopy is paused.");
1982        return;
1983    }
1984
1985    /* If there's an existing transport, release it */
1986    migration_incoming_transport_cleanup(mis);
1987
1988    /*
1989     * Note that this call will never start a real migration; it will
1990     * only re-setup the migration stream and poke existing migration
1991     * to continue using that newly established channel.
1992     */
1993    qemu_start_incoming_migration(uri, false, NULL, errp);
1994}
1995
1996void qmp_migrate_pause(Error **errp)
1997{
1998    MigrationState *ms = migrate_get_current();
1999    MigrationIncomingState *mis = migration_incoming_get_current();
2000    int ret = 0;
2001
2002    if (migration_postcopy_is_alive(ms->state)) {
2003        /* Source side, during postcopy */
2004        Error *error = NULL;
2005
2006        /* Tell the core migration that we're pausing */
2007        error_setg(&error, "Postcopy migration is paused by the user");
2008        migrate_set_error(ms, error);
2009        error_free(error);
2010
2011        qemu_mutex_lock(&ms->qemu_file_lock);
2012        if (ms->to_dst_file) {
2013            ret = qemu_file_shutdown(ms->to_dst_file);
2014        }
2015        qemu_mutex_unlock(&ms->qemu_file_lock);
2016        if (ret) {
2017            error_setg(errp, "Failed to pause source migration");
2018        }
2019
2020        /*
2021         * Kick the migration thread out of any waiting windows (on behalf
2022         * of the rp thread).
2023         */
2024        migration_rp_kick(ms);
2025
2026        return;
2027    }
2028
2029    if (migration_postcopy_is_alive(mis->state)) {
2030        ret = qemu_file_shutdown(mis->from_src_file);
2031        if (ret) {
2032            error_setg(errp, "Failed to pause destination migration");
2033        }
2034        return;
2035    }
2036
2037    error_setg(errp, "migrate-pause is currently only supported "
2038               "during postcopy-active or postcopy-recover state");
2039}
2040
2041bool migration_is_blocked(Error **errp)
2042{
2043    GSList *blockers = migration_blockers[migrate_mode()];
2044
2045    if (qemu_savevm_state_blocked(errp)) {
2046        return true;
2047    }
2048
2049    if (blockers) {
2050        error_propagate(errp, error_copy(blockers->data));
2051        return true;
2052    }
2053
2054    return false;
2055}
2056
2057/* Returns true if continue to migrate, or false if error detected */
2058static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
2059{
2060    if (resume) {
2061        if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2062            error_setg(errp, "Cannot resume if there is no "
2063                       "paused migration");
2064            return false;
2065        }
2066
2067        /*
2068         * Postcopy recovery won't work well with release-ram
2069         * capability since release-ram will drop the page buffer as
2070         * long as the page is put into the send buffer.  So if there
2071         * is a network failure happened, any page buffers that have
2072         * not yet reached the destination VM but have already been
2073         * sent from the source VM will be lost forever.  Let's refuse
2074         * the client from resuming such a postcopy migration.
2075         * Luckily release-ram was designed to only be used when src
2076         * and destination VMs are on the same host, so it should be
2077         * fine.
2078         */
2079        if (migrate_release_ram()) {
2080            error_setg(errp, "Postcopy recovery cannot work "
2081                       "when release-ram capability is set");
2082            return false;
2083        }
2084
2085        migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
2086                          MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
2087
2088        /* This is a resume, skip init status */
2089        return true;
2090    }
2091
2092    if (migration_is_running()) {
2093        error_setg(errp, "There's a migration process in progress");
2094        return false;
2095    }
2096
2097    if (runstate_check(RUN_STATE_INMIGRATE)) {
2098        error_setg(errp, "Guest is waiting for an incoming migration");
2099        return false;
2100    }
2101
2102    if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2103        error_setg(errp, "Can't migrate the vm that was paused due to "
2104                   "previous migration");
2105        return false;
2106    }
2107
2108    if (kvm_hwpoisoned_mem()) {
2109        error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
2110                   "please reboot the vm and try again");
2111        return false;
2112    }
2113
2114    if (migration_is_blocked(errp)) {
2115        return false;
2116    }
2117
2118    if (migrate_mapped_ram()) {
2119        if (migrate_tls()) {
2120            error_setg(errp, "Cannot use TLS with mapped-ram");
2121            return false;
2122        }
2123
2124        if (migrate_multifd_compression()) {
2125            error_setg(errp, "Cannot use compression with mapped-ram");
2126            return false;
2127        }
2128    }
2129
2130    if (migrate_mode_is_cpr(s)) {
2131        const char *conflict = NULL;
2132
2133        if (migrate_postcopy()) {
2134            conflict = "postcopy";
2135        } else if (migrate_background_snapshot()) {
2136            conflict = "background snapshot";
2137        } else if (migrate_colo()) {
2138            conflict = "COLO";
2139        }
2140
2141        if (conflict) {
2142            error_setg(errp, "Cannot use %s with CPR", conflict);
2143            return false;
2144        }
2145    }
2146
2147    if (migrate_init(s, errp)) {
2148        return false;
2149    }
2150
2151    return true;
2152}
2153
2154static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
2155                               Error **errp);
2156
2157static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb,
2158                            void *opaque)
2159{
2160        s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP);
2161        g_source_set_callback(s->hup_source, cb, opaque, NULL);
2162        g_source_attach(s->hup_source, NULL);
2163}
2164
2165static void migrate_hup_delete(MigrationState *s)
2166{
2167    if (s->hup_source) {
2168        g_source_destroy(s->hup_source);
2169        g_source_unref(s->hup_source);
2170        s->hup_source = NULL;
2171    }
2172}
2173
2174static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
2175                                      GIOCondition cond,
2176                                      void *opaque)
2177{
2178    MigrationAddress *addr = opaque;
2179
2180    qmp_migrate_finish(addr, false, NULL);
2181
2182    cpr_state_close();
2183    migrate_hup_delete(migrate_get_current());
2184    qapi_free_MigrationAddress(addr);
2185    return G_SOURCE_REMOVE;
2186}
2187
2188void qmp_migrate(const char *uri, bool has_channels,
2189                 MigrationChannelList *channels, bool has_detach, bool detach,
2190                 bool has_resume, bool resume, Error **errp)
2191{
2192    bool resume_requested;
2193    Error *local_err = NULL;
2194    MigrationState *s = migrate_get_current();
2195    g_autoptr(MigrationChannel) channel = NULL;
2196    MigrationAddress *addr = NULL;
2197    MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL };
2198    MigrationChannel *cpr_channel = NULL;
2199
2200    /*
2201     * Having preliminary checks for uri and channel
2202     */
2203    if (!uri == !channels) {
2204        error_setg(errp, "need either 'uri' or 'channels' argument");
2205        return;
2206    }
2207
2208    if (channels) {
2209        for ( ; channels; channels = channels->next) {
2210            MigrationChannelType type = channels->value->channel_type;
2211
2212            if (channelv[type]) {
2213                error_setg(errp, "Channel list has more than one %s entry",
2214                           MigrationChannelType_str(type));
2215                return;
2216            }
2217            channelv[type] = channels->value;
2218        }
2219        cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR];
2220        addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr;
2221        if (!addr) {
2222            error_setg(errp, "Channel list has no main entry");
2223            return;
2224        }
2225    }
2226
2227    if (uri) {
2228        /* caller uses the old URI syntax */
2229        if (!migrate_uri_parse(uri, &channel, errp)) {
2230            return;
2231        }
2232        addr = channel->addr;
2233    }
2234
2235    /* transport mechanism not suitable for migration? */
2236    if (!migration_transport_compatible(addr, errp)) {
2237        return;
2238    }
2239
2240    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
2241        error_setg(errp, "missing 'cpr' migration channel");
2242        return;
2243    }
2244
2245    resume_requested = has_resume && resume;
2246    if (!migrate_prepare(s, resume_requested, errp)) {
2247        /* Error detected, put into errp */
2248        return;
2249    }
2250
2251    if (cpr_state_save(cpr_channel, &local_err)) {
2252        goto out;
2253    }
2254
2255    /*
2256     * For cpr-transfer, the target may not be listening yet on the migration
2257     * channel, because first it must finish cpr_load_state.  The target tells
2258     * us it is listening by closing the cpr-state socket.  Wait for that HUP
2259     * event before connecting in qmp_migrate_finish.
2260     *
2261     * The HUP could occur because the target fails while reading CPR state,
2262     * in which case the target will not listen for the incoming migration
2263     * connection, so qmp_migrate_finish will fail to connect, and then recover.
2264     */
2265    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
2266        migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb,
2267                        QAPI_CLONE(MigrationAddress, addr));
2268
2269    } else {
2270        qmp_migrate_finish(addr, resume_requested, errp);
2271    }
2272
2273out:
2274    if (local_err) {
2275        migration_connect_set_error(s, local_err);
2276        error_propagate(errp, local_err);
2277    }
2278}
2279
2280static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
2281                               Error **errp)
2282{
2283    MigrationState *s = migrate_get_current();
2284    Error *local_err = NULL;
2285
2286    if (!resume_requested) {
2287        if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2288            return;
2289        }
2290    }
2291
2292    if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
2293        SocketAddress *saddr = &addr->u.socket;
2294        if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
2295            saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
2296            saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
2297            socket_start_outgoing_migration(s, saddr, &local_err);
2298        } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
2299            fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err);
2300        }
2301#ifdef CONFIG_RDMA
2302    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
2303        rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err);
2304#endif
2305    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
2306        exec_start_outgoing_migration(s, addr->u.exec.args, &local_err);
2307    } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
2308        file_start_outgoing_migration(s, &addr->u.file, &local_err);
2309    } else {
2310        error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
2311                   "a valid migration protocol");
2312        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2313                          MIGRATION_STATUS_FAILED);
2314    }
2315
2316    if (local_err) {
2317        if (!resume_requested) {
2318            yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2319        }
2320        migration_connect_set_error(s, local_err);
2321        error_propagate(errp, local_err);
2322        return;
2323    }
2324}
2325
2326void qmp_migrate_cancel(Error **errp)
2327{
2328    /*
2329     * After postcopy migration has started, the source machine is not
2330     * recoverable in case of a migration error. This also means the
2331     * cancel command cannot be used as cancel should allow the
2332     * machine to continue operation.
2333     */
2334    if (migration_in_postcopy()) {
2335        error_setg(errp, "Postcopy migration in progress, cannot cancel.");
2336        return;
2337    }
2338
2339    migration_cancel();
2340}
2341
2342void qmp_migrate_continue(MigrationStatus state, Error **errp)
2343{
2344    MigrationState *s = migrate_get_current();
2345    if (s->state != state) {
2346        error_setg(errp,  "Migration not in expected state: %s",
2347                   MigrationStatus_str(s->state));
2348        return;
2349    }
2350    qemu_event_set(&s->pause_event);
2351}
2352
2353int migration_rp_wait(MigrationState *s)
2354{
2355    /* If migration has failure already, ignore the wait */
2356    if (migrate_has_error(s)) {
2357        return -1;
2358    }
2359
2360    qemu_sem_wait(&s->rp_state.rp_sem);
2361
2362    /* After wait, double check that there's no failure */
2363    if (migrate_has_error(s)) {
2364        return -1;
2365    }
2366
2367    return 0;
2368}
2369
2370void migration_rp_kick(MigrationState *s)
2371{
2372    qemu_sem_post(&s->rp_state.rp_sem);
2373}
2374
2375static struct rp_cmd_args {
2376    ssize_t     len; /* -1 = variable */
2377    const char *name;
2378} rp_cmd_args[] = {
2379    [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2380    [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2381    [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2382    [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2383    [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2384    [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2385    [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2386    [MIG_RP_MSG_SWITCHOVER_ACK] = { .len =  0, .name = "SWITCHOVER_ACK" },
2387    [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2388};
2389
2390/*
2391 * Process a request for pages received on the return path,
2392 * We're allowed to send more than requested (e.g. to round to our page size)
2393 * and we don't need to send pages that have already been sent.
2394 */
2395static void
2396migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2397                            ram_addr_t start, size_t len, Error **errp)
2398{
2399    long our_host_ps = qemu_real_host_page_size();
2400
2401    trace_migrate_handle_rp_req_pages(rbname, start, len);
2402
2403    /*
2404     * Since we currently insist on matching page sizes, just sanity check
2405     * we're being asked for whole host pages.
2406     */
2407    if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2408        !QEMU_IS_ALIGNED(len, our_host_ps)) {
2409        error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:"
2410                   RAM_ADDR_FMT " len: %zd", start, len);
2411        return;
2412    }
2413
2414    ram_save_queue_pages(rbname, start, len, errp);
2415}
2416
2417static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name,
2418                                          Error **errp)
2419{
2420    RAMBlock *block = qemu_ram_block_by_name(block_name);
2421
2422    if (!block) {
2423        error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'",
2424                   block_name);
2425        return false;
2426    }
2427
2428    /* Fetch the received bitmap and refresh the dirty bitmap */
2429    return ram_dirty_bitmap_reload(s, block, errp);
2430}
2431
2432static bool migrate_handle_rp_resume_ack(MigrationState *s,
2433                                         uint32_t value, Error **errp)
2434{
2435    trace_source_return_path_thread_resume_ack(value);
2436
2437    if (value != MIGRATION_RESUME_ACK_VALUE) {
2438        error_setg(errp, "illegal resume_ack value %"PRIu32, value);
2439        return false;
2440    }
2441
2442    /* Now both sides are active. */
2443    migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2444                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
2445
2446    /* Notify send thread that time to continue send pages */
2447    migration_rp_kick(s);
2448
2449    return true;
2450}
2451
2452/*
2453 * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
2454 * existed) in a safe way.
2455 */
2456static void migration_release_dst_files(MigrationState *ms)
2457{
2458    QEMUFile *file = NULL;
2459
2460    WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2461        /*
2462         * Reset the from_dst_file pointer first before releasing it, as we
2463         * can't block within lock section
2464         */
2465        file = ms->rp_state.from_dst_file;
2466        ms->rp_state.from_dst_file = NULL;
2467    }
2468
2469    /*
2470     * Do the same to postcopy fast path socket too if there is.  No
2471     * locking needed because this qemufile should only be managed by
2472     * return path thread.
2473     */
2474    if (ms->postcopy_qemufile_src) {
2475        migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
2476        qemu_file_shutdown(ms->postcopy_qemufile_src);
2477        qemu_fclose(ms->postcopy_qemufile_src);
2478        ms->postcopy_qemufile_src = NULL;
2479    }
2480
2481    qemu_fclose(file);
2482}
2483
2484/*
2485 * Handles messages sent on the return path towards the source VM
2486 *
2487 */
2488static void *source_return_path_thread(void *opaque)
2489{
2490    MigrationState *ms = opaque;
2491    QEMUFile *rp = ms->rp_state.from_dst_file;
2492    uint16_t header_len, header_type;
2493    uint8_t buf[512];
2494    uint32_t tmp32, sibling_error;
2495    ram_addr_t start = 0; /* =0 to silence warning */
2496    size_t  len = 0, expected_len;
2497    Error *err = NULL;
2498    int res;
2499
2500    trace_source_return_path_thread_entry();
2501    rcu_register_thread();
2502
2503    while (migration_is_running()) {
2504        trace_source_return_path_thread_loop_top();
2505
2506        header_type = qemu_get_be16(rp);
2507        header_len = qemu_get_be16(rp);
2508
2509        if (qemu_file_get_error(rp)) {
2510            qemu_file_get_error_obj(rp, &err);
2511            goto out;
2512        }
2513
2514        if (header_type >= MIG_RP_MSG_MAX ||
2515            header_type == MIG_RP_MSG_INVALID) {
2516            error_setg(&err, "Received invalid message 0x%04x length 0x%04x",
2517                       header_type, header_len);
2518            goto out;
2519        }
2520
2521        if ((rp_cmd_args[header_type].len != -1 &&
2522            header_len != rp_cmd_args[header_type].len) ||
2523            header_len > sizeof(buf)) {
2524            error_setg(&err, "Received '%s' message (0x%04x) with"
2525                       "incorrect length %d expecting %zu",
2526                       rp_cmd_args[header_type].name, header_type, header_len,
2527                       (size_t)rp_cmd_args[header_type].len);
2528            goto out;
2529        }
2530
2531        /* We know we've got a valid header by this point */
2532        res = qemu_get_buffer(rp, buf, header_len);
2533        if (res != header_len) {
2534            error_setg(&err, "Failed reading data for message 0x%04x"
2535                       " read %d expected %d",
2536                       header_type, res, header_len);
2537            goto out;
2538        }
2539
2540        /* OK, we have the message and the data */
2541        switch (header_type) {
2542        case MIG_RP_MSG_SHUT:
2543            sibling_error = ldl_be_p(buf);
2544            trace_source_return_path_thread_shut(sibling_error);
2545            if (sibling_error) {
2546                error_setg(&err, "Sibling indicated error %d", sibling_error);
2547            }
2548            /*
2549             * We'll let the main thread deal with closing the RP
2550             * we could do a shutdown(2) on it, but we're the only user
2551             * anyway, so there's nothing gained.
2552             */
2553            goto out;
2554
2555        case MIG_RP_MSG_PONG:
2556            tmp32 = ldl_be_p(buf);
2557            trace_source_return_path_thread_pong(tmp32);
2558            qemu_sem_post(&ms->rp_state.rp_pong_acks);
2559            break;
2560
2561        case MIG_RP_MSG_REQ_PAGES:
2562            start = ldq_be_p(buf);
2563            len = ldl_be_p(buf + 8);
2564            migrate_handle_rp_req_pages(ms, NULL, start, len, &err);
2565            if (err) {
2566                goto out;
2567            }
2568            break;
2569
2570        case MIG_RP_MSG_REQ_PAGES_ID:
2571            expected_len = 12 + 1; /* header + termination */
2572
2573            if (header_len >= expected_len) {
2574                start = ldq_be_p(buf);
2575                len = ldl_be_p(buf + 8);
2576                /* Now we expect an idstr */
2577                tmp32 = buf[12]; /* Length of the following idstr */
2578                buf[13 + tmp32] = '\0';
2579                expected_len += tmp32;
2580            }
2581            if (header_len != expected_len) {
2582                error_setg(&err, "Req_Page_id with length %d expecting %zd",
2583                           header_len, expected_len);
2584                goto out;
2585            }
2586            migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len,
2587                                        &err);
2588            if (err) {
2589                goto out;
2590            }
2591            break;
2592
2593        case MIG_RP_MSG_RECV_BITMAP:
2594            if (header_len < 1) {
2595                error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name");
2596                goto out;
2597            }
2598            /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2599            buf[buf[0] + 1] = '\0';
2600            if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) {
2601                goto out;
2602            }
2603            break;
2604
2605        case MIG_RP_MSG_RESUME_ACK:
2606            tmp32 = ldl_be_p(buf);
2607            if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) {
2608                goto out;
2609            }
2610            break;
2611
2612        case MIG_RP_MSG_SWITCHOVER_ACK:
2613            ms->switchover_acked = true;
2614            trace_source_return_path_thread_switchover_acked();
2615            break;
2616
2617        default:
2618            break;
2619        }
2620    }
2621
2622out:
2623    if (err) {
2624        migrate_set_error(ms, err);
2625        error_free(err);
2626        trace_source_return_path_thread_bad_end();
2627    }
2628
2629    if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2630        /*
2631         * this will be extremely unlikely: that we got yet another network
2632         * issue during recovering of the 1st network failure.. during this
2633         * period the main migration thread can be waiting on rp_sem for
2634         * this thread to sync with the other side.
2635         *
2636         * When this happens, explicitly kick the migration thread out of
2637         * RECOVER stage and back to PAUSED, so the admin can try
2638         * everything again.
2639         */
2640        migration_rp_kick(ms);
2641    }
2642
2643    trace_source_return_path_thread_end();
2644    rcu_unregister_thread();
2645
2646    return NULL;
2647}
2648
2649static int open_return_path_on_source(MigrationState *ms)
2650{
2651    ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2652    if (!ms->rp_state.from_dst_file) {
2653        return -1;
2654    }
2655
2656    trace_open_return_path_on_source();
2657
2658    qemu_thread_create(&ms->rp_state.rp_thread, MIGRATION_THREAD_SRC_RETURN,
2659                       source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2660    ms->rp_state.rp_thread_created = true;
2661
2662    trace_open_return_path_on_source_continue();
2663
2664    return 0;
2665}
2666
2667/* Return true if error detected, or false otherwise */
2668static bool close_return_path_on_source(MigrationState *ms)
2669{
2670    if (!ms->rp_state.rp_thread_created) {
2671        return false;
2672    }
2673
2674    trace_migration_return_path_end_before();
2675
2676    /*
2677     * If this is a normal exit then the destination will send a SHUT
2678     * and the rp_thread will exit, however if there's an error we
2679     * need to cause it to exit. shutdown(2), if we have it, will
2680     * cause it to unblock if it's stuck waiting for the destination.
2681     */
2682    WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2683        if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
2684            qemu_file_shutdown(ms->rp_state.from_dst_file);
2685        }
2686    }
2687
2688    qemu_thread_join(&ms->rp_state.rp_thread);
2689    ms->rp_state.rp_thread_created = false;
2690    migration_release_dst_files(ms);
2691    trace_migration_return_path_end_after();
2692
2693    /* Return path will persist the error in MigrationState when quit */
2694    return migrate_has_error(ms);
2695}
2696
2697static inline void
2698migration_wait_main_channel(MigrationState *ms)
2699{
2700    /* Wait until one PONG message received */
2701    qemu_sem_wait(&ms->rp_state.rp_pong_acks);
2702}
2703
2704/*
2705 * Switch from normal iteration to postcopy
2706 * Returns non-0 on error
2707 */
2708static int postcopy_start(MigrationState *ms, Error **errp)
2709{
2710    int ret;
2711    QIOChannelBuffer *bioc;
2712    QEMUFile *fb;
2713
2714    /*
2715     * Now we're 100% sure to switch to postcopy, so JSON writer won't be
2716     * useful anymore.  Free the resources early if it is there.  Clearing
2717     * the vmdesc also means any follow up vmstate_save()s will start to
2718     * skip all JSON operations, which can shrink postcopy downtime.
2719     */
2720    migration_cleanup_json_writer(ms);
2721
2722    if (migrate_postcopy_preempt()) {
2723        migration_wait_main_channel(ms);
2724        if (postcopy_preempt_establish_channel(ms)) {
2725            if (ms->state != MIGRATION_STATUS_CANCELLING) {
2726                migrate_set_state(&ms->state, ms->state,
2727                                  MIGRATION_STATUS_FAILED);
2728            }
2729            error_setg(errp, "%s: Failed to establish preempt channel",
2730                       __func__);
2731            return -1;
2732        }
2733    }
2734
2735    if (!qemu_savevm_state_postcopy_prepare(ms->to_dst_file, errp)) {
2736        return -1;
2737    }
2738
2739    trace_postcopy_start();
2740    bql_lock();
2741    trace_postcopy_start_set_run();
2742
2743    ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE);
2744    if (ret < 0) {
2745        error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__);
2746        goto fail;
2747    }
2748
2749    if (!migration_switchover_start(ms, errp)) {
2750        goto fail;
2751    }
2752
2753    /*
2754     * Cause any non-postcopiable, but iterative devices to
2755     * send out their final data.
2756     */
2757    ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true);
2758    if (ret) {
2759        error_setg(errp, "Postcopy save non-postcopiable iterables failed");
2760        goto fail;
2761    }
2762
2763    /*
2764     * in Finish migrate and with the io-lock held everything should
2765     * be quiet, but we've potentially still got dirty pages and we
2766     * need to tell the destination to throw any pages it's already received
2767     * that are dirty
2768     */
2769    if (migrate_postcopy_ram()) {
2770        ram_postcopy_send_discard_bitmap(ms);
2771    }
2772
2773    if (migrate_postcopy_ram()) {
2774        /* Ping just for debugging, helps line traces up */
2775        qemu_savevm_send_ping(ms->to_dst_file, 2);
2776    }
2777
2778    /*
2779     * While loading the device state we may trigger page transfer
2780     * requests and the fd must be free to process those, and thus
2781     * the destination must read the whole device state off the fd before
2782     * it starts processing it.  Unfortunately the ad-hoc migration format
2783     * doesn't allow the destination to know the size to read without fully
2784     * parsing it through each devices load-state code (especially the open
2785     * coded devices that use get/put).
2786     * So we wrap the device state up in a package with a length at the start;
2787     * to do this we use a qemu_buf to hold the whole of the device state.
2788     */
2789    bioc = qio_channel_buffer_new(4096);
2790    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
2791    fb = qemu_file_new_output(QIO_CHANNEL(bioc));
2792    object_unref(OBJECT(bioc));
2793
2794    /*
2795     * Make sure the receiver can get incoming pages before we send the rest
2796     * of the state
2797     */
2798    qemu_savevm_send_postcopy_listen(fb);
2799
2800    ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true);
2801    if (ret) {
2802        error_setg(errp, "Postcopy save non-iterable device states failed");
2803        goto fail_closefb;
2804    }
2805
2806    if (migrate_postcopy_ram()) {
2807        qemu_savevm_send_ping(fb, 3);
2808    }
2809
2810    qemu_savevm_send_postcopy_run(fb);
2811
2812    /* <><> end of stuff going into the package */
2813
2814    /* Last point of recovery; as soon as we send the package the destination
2815     * can open devices and potentially start running.
2816     * Lets just check again we've not got any errors.
2817     */
2818    ret = qemu_file_get_error(ms->to_dst_file);
2819    if (ret) {
2820        error_setg(errp, "postcopy_start: Migration stream errored (pre package)");
2821        goto fail_closefb;
2822    }
2823
2824    /* Now send that blob */
2825    if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2826        error_setg(errp, "%s: Failed to send packaged data", __func__);
2827        goto fail_closefb;
2828    }
2829    qemu_fclose(fb);
2830
2831    /* Send a notify to give a chance for anything that needs to happen
2832     * at the transition to postcopy and after the device state; in particular
2833     * spice needs to trigger a transition now
2834     */
2835    migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL);
2836
2837    migration_downtime_end(ms);
2838
2839    if (migrate_postcopy_ram()) {
2840        /*
2841         * Although this ping is just for debug, it could potentially be
2842         * used for getting a better measurement of downtime at the source.
2843         */
2844        qemu_savevm_send_ping(ms->to_dst_file, 4);
2845    }
2846
2847    if (migrate_release_ram()) {
2848        ram_postcopy_migrated_memory_release(ms);
2849    }
2850
2851    ret = qemu_file_get_error(ms->to_dst_file);
2852    if (ret) {
2853        error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
2854        goto fail;
2855    }
2856    trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
2857
2858    /*
2859     * Now postcopy officially started, switch to postcopy bandwidth that
2860     * user specified.
2861     */
2862    migration_rate_set(migrate_max_postcopy_bandwidth());
2863
2864    /* Now, switchover looks all fine, switching to postcopy-active */
2865    migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
2866                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
2867
2868    bql_unlock();
2869
2870    return ret;
2871
2872fail_closefb:
2873    qemu_fclose(fb);
2874fail:
2875    migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2876                          MIGRATION_STATUS_FAILED);
2877    migration_block_activate(NULL);
2878    migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
2879    bql_unlock();
2880    return -1;
2881}
2882
2883/**
2884 * @migration_switchover_prepare: Start VM switchover procedure
2885 *
2886 * @s: The migration state object pointer
2887 *
2888 * Prepares for the switchover, depending on "pause-before-switchover"
2889 * capability.
2890 *
2891 * If cap set, state machine goes like:
2892 *   [postcopy-]active -> pre-switchover -> device
2893 *
2894 * If cap not set:
2895 *   [postcopy-]active -> device
2896 *
2897 * Returns: true on success, false on interruptions.
2898 */
2899static bool migration_switchover_prepare(MigrationState *s)
2900{
2901    /* Concurrent cancellation?  Quit */
2902    if (s->state == MIGRATION_STATUS_CANCELLING) {
2903        return false;
2904    }
2905
2906    /*
2907     * No matter precopy or postcopy, since we still hold BQL it must not
2908     * change concurrently to CANCELLING, so it must be either ACTIVE or
2909     * POSTCOPY_ACTIVE.
2910     */
2911    assert(migration_is_active());
2912
2913    /* If the pre stage not requested, directly switch to DEVICE */
2914    if (!migrate_pause_before_switchover()) {
2915        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE);
2916        return true;
2917    }
2918
2919    /*
2920     * Since leaving this state is not atomic with setting the event
2921     * it's possible that someone could have issued multiple migrate_continue
2922     * and the event is incorrectly set at this point so reset it.
2923     */
2924    qemu_event_reset(&s->pause_event);
2925
2926    /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
2927    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
2928    bql_unlock();
2929
2930    qemu_event_wait(&s->pause_event);
2931
2932    bql_lock();
2933    /*
2934     * After BQL released and retaken, the state can be CANCELLING if it
2935     * happend during sem_wait().. Only change the state if it's still
2936     * pre-switchover.
2937     */
2938    migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2939                      MIGRATION_STATUS_DEVICE);
2940
2941    return s->state == MIGRATION_STATUS_DEVICE;
2942}
2943
2944static bool migration_switchover_start(MigrationState *s, Error **errp)
2945{
2946    ERRP_GUARD();
2947
2948    if (!migration_switchover_prepare(s)) {
2949        error_setg(errp, "Switchover is interrupted");
2950        return false;
2951    }
2952
2953    /* Inactivate disks except in COLO */
2954    if (!migrate_colo()) {
2955        /*
2956         * Inactivate before sending QEMU_VM_EOF so that the
2957         * bdrv_activate_all() on the other end won't fail.
2958         */
2959        if (!migration_block_inactivate()) {
2960            error_setg(errp, "Block inactivate failed during switchover");
2961            return false;
2962        }
2963    }
2964
2965    migration_rate_set(RATE_LIMIT_DISABLED);
2966
2967    precopy_notify_complete();
2968
2969    qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
2970
2971    return true;
2972}
2973
2974static int migration_completion_precopy(MigrationState *s)
2975{
2976    int ret;
2977
2978    bql_lock();
2979
2980    if (!migrate_mode_is_cpr(s)) {
2981        ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
2982        if (ret < 0) {
2983            goto out_unlock;
2984        }
2985    }
2986
2987    if (!migration_switchover_start(s, NULL)) {
2988        ret = -EFAULT;
2989        goto out_unlock;
2990    }
2991
2992    ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false);
2993out_unlock:
2994    bql_unlock();
2995    return ret;
2996}
2997
2998static void migration_completion_postcopy(MigrationState *s)
2999{
3000    trace_migration_completion_postcopy_end();
3001
3002    bql_lock();
3003    qemu_savevm_state_complete_postcopy(s->to_dst_file);
3004    bql_unlock();
3005
3006    /*
3007     * Shutdown the postcopy fast path thread.  This is only needed when dest
3008     * QEMU binary is old (7.1/7.2).  QEMU 8.0+ doesn't need this.
3009     */
3010    if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
3011        postcopy_preempt_shutdown_file(s);
3012    }
3013
3014    trace_migration_completion_postcopy_end_after_complete();
3015}
3016
3017/**
3018 * migration_completion: Used by migration_thread when there's not much left.
3019 *   The caller 'breaks' the loop when this returns.
3020 *
3021 * @s: Current migration state
3022 */
3023static void migration_completion(MigrationState *s)
3024{
3025    int ret = 0;
3026    Error *local_err = NULL;
3027
3028    if (s->state == MIGRATION_STATUS_ACTIVE) {
3029        ret = migration_completion_precopy(s);
3030    } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3031        migration_completion_postcopy(s);
3032    } else {
3033        ret = -1;
3034    }
3035
3036    if (ret < 0) {
3037        goto fail;
3038    }
3039
3040    if (close_return_path_on_source(s)) {
3041        goto fail;
3042    }
3043
3044    if (qemu_file_get_error(s->to_dst_file)) {
3045        trace_migration_completion_file_err();
3046        goto fail;
3047    }
3048
3049    if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
3050        /* COLO does not support postcopy */
3051        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3052                          MIGRATION_STATUS_COLO);
3053    } else {
3054        migration_completion_end(s);
3055    }
3056
3057    return;
3058
3059fail:
3060    if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) {
3061        migrate_set_error(s, local_err);
3062        error_free(local_err);
3063    } else if (ret) {
3064        error_setg_errno(&local_err, -ret, "Error in migration completion");
3065        migrate_set_error(s, local_err);
3066        error_free(local_err);
3067    }
3068
3069    if (s->state != MIGRATION_STATUS_CANCELLING) {
3070        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
3071    }
3072}
3073
3074/**
3075 * bg_migration_completion: Used by bg_migration_thread when after all the
3076 *   RAM has been saved. The caller 'breaks' the loop when this returns.
3077 *
3078 * @s: Current migration state
3079 */
3080static void bg_migration_completion(MigrationState *s)
3081{
3082    int current_active_state = s->state;
3083
3084    if (s->state == MIGRATION_STATUS_ACTIVE) {
3085        /*
3086         * By this moment we have RAM content saved into the migration stream.
3087         * The next step is to flush the non-RAM content (device state)
3088         * right after the ram content. The device state has been stored into
3089         * the temporary buffer before RAM saving started.
3090         */
3091        qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
3092        qemu_fflush(s->to_dst_file);
3093    } else if (s->state == MIGRATION_STATUS_CANCELLING) {
3094        return;
3095    }
3096
3097    if (qemu_file_get_error(s->to_dst_file)) {
3098        trace_migration_completion_file_err();
3099        goto fail;
3100    }
3101
3102    migration_completion_end(s);
3103    return;
3104
3105fail:
3106    migrate_set_state(&s->state, current_active_state,
3107                      MIGRATION_STATUS_FAILED);
3108}
3109
3110typedef enum MigThrError {
3111    /* No error detected */
3112    MIG_THR_ERR_NONE = 0,
3113    /* Detected error, but resumed successfully */
3114    MIG_THR_ERR_RECOVERED = 1,
3115    /* Detected fatal error, need to exit */
3116    MIG_THR_ERR_FATAL = 2,
3117} MigThrError;
3118
3119static int postcopy_resume_handshake(MigrationState *s)
3120{
3121    qemu_savevm_send_postcopy_resume(s->to_dst_file);
3122
3123    while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3124        if (migration_rp_wait(s)) {
3125            return -1;
3126        }
3127    }
3128
3129    if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3130        return 0;
3131    }
3132
3133    return -1;
3134}
3135
3136/* Return zero if success, or <0 for error */
3137static int postcopy_do_resume(MigrationState *s)
3138{
3139    int ret;
3140
3141    /*
3142     * Call all the resume_prepare() hooks, so that modules can be
3143     * ready for the migration resume.
3144     */
3145    ret = qemu_savevm_state_resume_prepare(s);
3146    if (ret) {
3147        error_report("%s: resume_prepare() failure detected: %d",
3148                     __func__, ret);
3149        return ret;
3150    }
3151
3152    /*
3153     * If preempt is enabled, re-establish the preempt channel.  Note that
3154     * we do it after resume prepare to make sure the main channel will be
3155     * created before the preempt channel.  E.g. with weak network, the
3156     * dest QEMU may get messed up with the preempt and main channels on
3157     * the order of connection setup.  This guarantees the correct order.
3158     */
3159    ret = postcopy_preempt_establish_channel(s);
3160    if (ret) {
3161        error_report("%s: postcopy_preempt_establish_channel(): %d",
3162                     __func__, ret);
3163        return ret;
3164    }
3165
3166    /*
3167     * Last handshake with destination on the resume (destination will
3168     * switch to postcopy-active afterwards)
3169     */
3170    ret = postcopy_resume_handshake(s);
3171    if (ret) {
3172        error_report("%s: handshake failed: %d", __func__, ret);
3173        return ret;
3174    }
3175
3176    return 0;
3177}
3178
3179/*
3180 * We don't return until we are in a safe state to continue current
3181 * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
3182 * MIG_THR_ERR_FATAL if unrecovery failure happened.
3183 */
3184static MigThrError postcopy_pause(MigrationState *s)
3185{
3186    assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3187
3188    while (true) {
3189        QEMUFile *file;
3190
3191        /*
3192         * We're already pausing, so ignore any errors on the return
3193         * path and just wait for the thread to finish. It will be
3194         * re-created when we resume.
3195         */
3196        close_return_path_on_source(s);
3197
3198        /*
3199         * Current channel is possibly broken. Release it.  Note that this is
3200         * guaranteed even without lock because to_dst_file should only be
3201         * modified by the migration thread.  That also guarantees that the
3202         * unregister of yank is safe too without the lock.  It should be safe
3203         * even to be within the qemu_file_lock, but we didn't do that to avoid
3204         * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3205         * the qemu_file_lock critical section as small as possible.
3206         */
3207        assert(s->to_dst_file);
3208        migration_ioc_unregister_yank_from_file(s->to_dst_file);
3209        qemu_mutex_lock(&s->qemu_file_lock);
3210        file = s->to_dst_file;
3211        s->to_dst_file = NULL;
3212        qemu_mutex_unlock(&s->qemu_file_lock);
3213
3214        qemu_file_shutdown(file);
3215        qemu_fclose(file);
3216
3217        migrate_set_state(&s->state, s->state,
3218                          MIGRATION_STATUS_POSTCOPY_PAUSED);
3219
3220        error_report("Detected IO failure for postcopy. "
3221                     "Migration paused.");
3222
3223        /*
3224         * We wait until things fixed up. Then someone will setup the
3225         * status back for us.
3226         */
3227        do {
3228            qemu_sem_wait(&s->postcopy_pause_sem);
3229        } while (postcopy_is_paused(s->state));
3230
3231        if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3232            /* Woken up by a recover procedure. Give it a shot */
3233
3234            /* Do the resume logic */
3235            if (postcopy_do_resume(s) == 0) {
3236                /* Let's continue! */
3237                trace_postcopy_pause_continued();
3238                return MIG_THR_ERR_RECOVERED;
3239            } else {
3240                /*
3241                 * Something wrong happened during the recovery, let's
3242                 * pause again. Pause is always better than throwing
3243                 * data away.
3244                 */
3245                continue;
3246            }
3247        } else {
3248            /* This is not right... Time to quit. */
3249            return MIG_THR_ERR_FATAL;
3250        }
3251    }
3252}
3253
3254void migration_file_set_error(int ret, Error *err)
3255{
3256    MigrationState *s = current_migration;
3257
3258    WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
3259        if (s->to_dst_file) {
3260            qemu_file_set_error_obj(s->to_dst_file, ret, err);
3261        } else if (err) {
3262            error_report_err(err);
3263        }
3264    }
3265}
3266
3267static MigThrError migration_detect_error(MigrationState *s)
3268{
3269    int ret;
3270    int state = s->state;
3271    Error *local_error = NULL;
3272
3273    if (state == MIGRATION_STATUS_CANCELLING ||
3274        state == MIGRATION_STATUS_CANCELLED) {
3275        /* End the migration, but don't set the state to failed */
3276        return MIG_THR_ERR_FATAL;
3277    }
3278
3279    /*
3280     * Try to detect any file errors.  Note that postcopy_qemufile_src will
3281     * be NULL when postcopy preempt is not enabled.
3282     */
3283    ret = qemu_file_get_error_obj_any(s->to_dst_file,
3284                                      s->postcopy_qemufile_src,
3285                                      &local_error);
3286    if (!ret) {
3287        /* Everything is fine */
3288        assert(!local_error);
3289        return MIG_THR_ERR_NONE;
3290    }
3291
3292    if (local_error) {
3293        migrate_set_error(s, local_error);
3294        error_free(local_error);
3295    }
3296
3297    if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3298        /*
3299         * For postcopy, we allow the network to be down for a
3300         * while. After that, it can be continued by a
3301         * recovery phase.
3302         */
3303        return postcopy_pause(s);
3304    } else {
3305        /*
3306         * For precopy (or postcopy with error outside IO), we fail
3307         * with no time.
3308         */
3309        migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3310        trace_migration_thread_file_err();
3311
3312        /* Time to stop the migration, now. */
3313        return MIG_THR_ERR_FATAL;
3314    }
3315}
3316
3317static void migration_completion_end(MigrationState *s)
3318{
3319    uint64_t bytes = migration_transferred_bytes();
3320    int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3321    int64_t transfer_time;
3322
3323    /*
3324     * Take the BQL here so that query-migrate on the QMP thread sees:
3325     * - atomic update of s->total_time and s->mbps;
3326     * - correct ordering of s->mbps update vs. s->state;
3327     */
3328    bql_lock();
3329    migration_downtime_end(s);
3330    s->total_time = end_time - s->start_time;
3331    transfer_time = s->total_time - s->setup_time;
3332    if (transfer_time) {
3333        s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3334    }
3335
3336    migrate_set_state(&s->state, s->state,
3337                      MIGRATION_STATUS_COMPLETED);
3338    bql_unlock();
3339}
3340
3341static void update_iteration_initial_status(MigrationState *s)
3342{
3343    /*
3344     * Update these three fields at the same time to avoid mismatch info lead
3345     * wrong speed calculation.
3346     */
3347    s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3348    s->iteration_initial_bytes = migration_transferred_bytes();
3349    s->iteration_initial_pages = ram_get_total_transferred_pages();
3350}
3351
3352static void migration_update_counters(MigrationState *s,
3353                                      int64_t current_time)
3354{
3355    uint64_t transferred, transferred_pages, time_spent;
3356    uint64_t current_bytes; /* bytes transferred since the beginning */
3357    uint64_t switchover_bw;
3358    /* Expected bandwidth when switching over to destination QEMU */
3359    double expected_bw_per_ms;
3360    double bandwidth;
3361
3362    if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3363        return;
3364    }
3365
3366    switchover_bw = migrate_avail_switchover_bandwidth();
3367    current_bytes = migration_transferred_bytes();
3368    transferred = current_bytes - s->iteration_initial_bytes;
3369    time_spent = current_time - s->iteration_start_time;
3370    bandwidth = (double)transferred / time_spent;
3371
3372    if (switchover_bw) {
3373        /*
3374         * If the user specified a switchover bandwidth, let's trust the
3375         * user so that can be more accurate than what we estimated.
3376         */
3377        expected_bw_per_ms = switchover_bw / 1000;
3378    } else {
3379        /* If the user doesn't specify bandwidth, we use the estimated */
3380        expected_bw_per_ms = bandwidth;
3381    }
3382
3383    s->threshold_size = expected_bw_per_ms * migrate_downtime_limit();
3384
3385    s->mbps = (((double) transferred * 8.0) /
3386               ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3387
3388    transferred_pages = ram_get_total_transferred_pages() -
3389                            s->iteration_initial_pages;
3390    s->pages_per_second = (double) transferred_pages /
3391                             (((double) time_spent / 1000.0));
3392
3393    /*
3394     * if we haven't sent anything, we don't want to
3395     * recalculate. 10000 is a small enough number for our purposes
3396     */
3397    if (stat64_get(&mig_stats.dirty_pages_rate) &&
3398        transferred > 10000) {
3399        s->expected_downtime =
3400            stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms;
3401    }
3402
3403    migration_rate_reset();
3404
3405    update_iteration_initial_status(s);
3406
3407    trace_migrate_transferred(transferred, time_spent,
3408                              /* Both in unit bytes/ms */
3409                              bandwidth, switchover_bw / 1000,
3410                              s->threshold_size);
3411}
3412
3413static bool migration_can_switchover(MigrationState *s)
3414{
3415    if (!migrate_switchover_ack()) {
3416        return true;
3417    }
3418
3419    /* No reason to wait for switchover ACK if VM is stopped */
3420    if (!runstate_is_running()) {
3421        return true;
3422    }
3423
3424    return s->switchover_acked;
3425}
3426
3427/* Migration thread iteration status */
3428typedef enum {
3429    MIG_ITERATE_RESUME,         /* Resume current iteration */
3430    MIG_ITERATE_SKIP,           /* Skip current iteration */
3431    MIG_ITERATE_BREAK,          /* Break the loop */
3432} MigIterateState;
3433
3434/*
3435 * Return true if continue to the next iteration directly, false
3436 * otherwise.
3437 */
3438static MigIterateState migration_iteration_run(MigrationState *s)
3439{
3440    uint64_t must_precopy, can_postcopy, pending_size;
3441    Error *local_err = NULL;
3442    bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3443    bool can_switchover = migration_can_switchover(s);
3444    bool complete_ready;
3445
3446    /* Fast path - get the estimated amount of pending data */
3447    qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
3448    pending_size = must_precopy + can_postcopy;
3449    trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
3450
3451    if (in_postcopy) {
3452        /*
3453         * Iterate in postcopy until all pending data flushed.  Note that
3454         * postcopy completion doesn't rely on can_switchover, because when
3455         * POSTCOPY_ACTIVE it means switchover already happened.
3456         */
3457        complete_ready = !pending_size;
3458    } else {
3459        /*
3460         * Exact pending reporting is only needed for precopy.  Taking RAM
3461         * as example, there'll be no extra dirty information after
3462         * postcopy started, so ESTIMATE should always match with EXACT
3463         * during postcopy phase.
3464         */
3465        if (pending_size < s->threshold_size) {
3466            qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
3467            pending_size = must_precopy + can_postcopy;
3468            trace_migrate_pending_exact(pending_size, must_precopy,
3469                                        can_postcopy);
3470        }
3471
3472        /* Should we switch to postcopy now? */
3473        if (must_precopy <= s->threshold_size &&
3474            can_switchover && qatomic_read(&s->start_postcopy)) {
3475            if (postcopy_start(s, &local_err)) {
3476                migrate_set_error(s, local_err);
3477                error_report_err(local_err);
3478            }
3479            return MIG_ITERATE_SKIP;
3480        }
3481
3482        /*
3483         * For precopy, migration can complete only if:
3484         *
3485         * (1) Switchover is acknowledged by destination
3486         * (2) Pending size is no more than the threshold specified
3487         *     (which was calculated from expected downtime)
3488         */
3489        complete_ready = can_switchover && (pending_size <= s->threshold_size);
3490    }
3491
3492    if (complete_ready) {
3493        trace_migration_thread_low_pending(pending_size);
3494        migration_completion(s);
3495        return MIG_ITERATE_BREAK;
3496    }
3497
3498    /* Just another iteration step */
3499    qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3500    return MIG_ITERATE_RESUME;
3501}
3502
3503static void migration_iteration_finish(MigrationState *s)
3504{
3505    bql_lock();
3506
3507    /*
3508     * If we enabled cpu throttling for auto-converge, turn it off.
3509     * Stopping CPU throttle should be serialized by BQL to avoid
3510     * racing for the throttle_dirty_sync_timer.
3511     */
3512    if (migrate_auto_converge()) {
3513        cpu_throttle_stop();
3514    }
3515
3516    switch (s->state) {
3517    case MIGRATION_STATUS_COMPLETED:
3518        runstate_set(RUN_STATE_POSTMIGRATE);
3519        break;
3520    case MIGRATION_STATUS_COLO:
3521        assert(migrate_colo());
3522        migrate_start_colo_process(s);
3523        s->vm_old_state = RUN_STATE_RUNNING;
3524        /* Fallthrough */
3525    case MIGRATION_STATUS_FAILED:
3526    case MIGRATION_STATUS_CANCELLED:
3527    case MIGRATION_STATUS_CANCELLING:
3528        /*
3529         * Re-activate the block drives if they're inactivated.  Note, COLO
3530         * shouldn't use block_active at all, so it should be no-op there.
3531         */
3532        migration_block_activate(NULL);
3533        if (runstate_is_live(s->vm_old_state)) {
3534            if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3535                vm_start();
3536            }
3537        } else {
3538            if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3539                runstate_set(s->vm_old_state);
3540            }
3541        }
3542        break;
3543
3544    default:
3545        /* Should not reach here, but if so, forgive the VM. */
3546        error_report("%s: Unknown ending state %d", __func__, s->state);
3547        break;
3548    }
3549
3550    migration_bh_schedule(migration_cleanup_bh, s);
3551    bql_unlock();
3552}
3553
3554static void bg_migration_iteration_finish(MigrationState *s)
3555{
3556    /*
3557     * Stop tracking RAM writes - un-protect memory, un-register UFFD
3558     * memory ranges, flush kernel wait queues and wake up threads
3559     * waiting for write fault to be resolved.
3560     */
3561    ram_write_tracking_stop();
3562
3563    bql_lock();
3564    switch (s->state) {
3565    case MIGRATION_STATUS_COMPLETED:
3566    case MIGRATION_STATUS_ACTIVE:
3567    case MIGRATION_STATUS_FAILED:
3568    case MIGRATION_STATUS_CANCELLED:
3569    case MIGRATION_STATUS_CANCELLING:
3570        break;
3571
3572    default:
3573        /* Should not reach here, but if so, forgive the VM. */
3574        error_report("%s: Unknown ending state %d", __func__, s->state);
3575        break;
3576    }
3577
3578    migration_bh_schedule(migration_cleanup_bh, s);
3579    bql_unlock();
3580}
3581
3582/*
3583 * Return true if continue to the next iteration directly, false
3584 * otherwise.
3585 */
3586static MigIterateState bg_migration_iteration_run(MigrationState *s)
3587{
3588    int res;
3589
3590    res = qemu_savevm_state_iterate(s->to_dst_file, false);
3591    if (res > 0) {
3592        bg_migration_completion(s);
3593        return MIG_ITERATE_BREAK;
3594    }
3595
3596    return MIG_ITERATE_RESUME;
3597}
3598
3599void migration_make_urgent_request(void)
3600{
3601    qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3602}
3603
3604void migration_consume_urgent_request(void)
3605{
3606    qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3607}
3608
3609/* Returns true if the rate limiting was broken by an urgent request */
3610bool migration_rate_limit(void)
3611{
3612    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3613    MigrationState *s = migrate_get_current();
3614
3615    bool urgent = false;
3616    migration_update_counters(s, now);
3617    if (migration_rate_exceeded(s->to_dst_file)) {
3618
3619        if (qemu_file_get_error(s->to_dst_file)) {
3620            return false;
3621        }
3622        /*
3623         * Wait for a delay to do rate limiting OR
3624         * something urgent to post the semaphore.
3625         */
3626        int ms = s->iteration_start_time + BUFFER_DELAY - now;
3627        trace_migration_rate_limit_pre(ms);
3628        if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3629            /*
3630             * We were woken by one or more urgent things but
3631             * the timedwait will have consumed one of them.
3632             * The service routine for the urgent wake will dec
3633             * the semaphore itself for each item it consumes,
3634             * so add this one we just eat back.
3635             */
3636            qemu_sem_post(&s->rate_limit_sem);
3637            urgent = true;
3638        }
3639        trace_migration_rate_limit_post(urgent);
3640    }
3641    return urgent;
3642}
3643
3644/*
3645 * if failover devices are present, wait they are completely
3646 * unplugged
3647 */
3648
3649static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
3650                                    int new_state)
3651{
3652    if (qemu_savevm_state_guest_unplug_pending()) {
3653        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
3654
3655        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
3656               qemu_savevm_state_guest_unplug_pending()) {
3657            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3658        }
3659        if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
3660            int timeout = 120; /* 30 seconds */
3661            /*
3662             * migration has been canceled
3663             * but as we have started an unplug we must wait the end
3664             * to be able to plug back the card
3665             */
3666            while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
3667                qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3668            }
3669            if (qemu_savevm_state_guest_unplug_pending() &&
3670                !qtest_enabled()) {
3671                warn_report("migration: partially unplugged device on "
3672                            "failure");
3673            }
3674        }
3675
3676        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
3677    } else {
3678        migrate_set_state(&s->state, old_state, new_state);
3679    }
3680}
3681
3682/*
3683 * Master migration thread on the source VM.
3684 * It drives the migration and pumps the data down the outgoing channel.
3685 */
3686static void *migration_thread(void *opaque)
3687{
3688    MigrationState *s = opaque;
3689    MigrationThread *thread = NULL;
3690    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3691    MigThrError thr_error;
3692    bool urgent = false;
3693    Error *local_err = NULL;
3694    int ret;
3695
3696    thread = migration_threads_add(MIGRATION_THREAD_SRC_MAIN,
3697                                   qemu_get_thread_id());
3698
3699    rcu_register_thread();
3700
3701    update_iteration_initial_status(s);
3702
3703    if (!multifd_send_setup()) {
3704        goto out;
3705    }
3706
3707    bql_lock();
3708    qemu_savevm_state_header(s->to_dst_file);
3709    bql_unlock();
3710
3711    /*
3712     * If we opened the return path, we need to make sure dst has it
3713     * opened as well.
3714     */
3715    if (s->rp_state.rp_thread_created) {
3716        /* Now tell the dest that it should open its end so it can reply */
3717        qemu_savevm_send_open_return_path(s->to_dst_file);
3718
3719        /* And do a ping that will make stuff easier to debug */
3720        qemu_savevm_send_ping(s->to_dst_file, 1);
3721    }
3722
3723    if (migrate_postcopy()) {
3724        /*
3725         * Tell the destination that we *might* want to do postcopy later;
3726         * if the other end can't do postcopy it should fail now, nice and
3727         * early.
3728         */
3729        qemu_savevm_send_postcopy_advise(s->to_dst_file);
3730    }
3731
3732    if (migrate_colo()) {
3733        /* Notify migration destination that we enable COLO */
3734        qemu_savevm_send_colo_enable(s->to_dst_file);
3735    }
3736
3737    if (migrate_auto_converge()) {
3738        /* Start RAMBlock dirty bitmap sync timer */
3739        cpu_throttle_dirty_sync_timer(true);
3740    }
3741
3742    bql_lock();
3743    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
3744    bql_unlock();
3745
3746    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3747                               MIGRATION_STATUS_ACTIVE);
3748
3749    /*
3750     * Handle SETUP failures after waiting for virtio-net-failover
3751     * devices to unplug. This to preserve migration state transitions.
3752     */
3753    if (ret) {
3754        migrate_set_error(s, local_err);
3755        error_free(local_err);
3756        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3757                          MIGRATION_STATUS_FAILED);
3758        goto out;
3759    }
3760
3761    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3762
3763    trace_migration_thread_setup_complete();
3764
3765    while (migration_is_active()) {
3766        if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
3767            MigIterateState iter_state = migration_iteration_run(s);
3768            if (iter_state == MIG_ITERATE_SKIP) {
3769                continue;
3770            } else if (iter_state == MIG_ITERATE_BREAK) {
3771                break;
3772            }
3773        }
3774
3775        /*
3776         * Try to detect any kind of failures, and see whether we
3777         * should stop the migration now.
3778         */
3779        thr_error = migration_detect_error(s);
3780        if (thr_error == MIG_THR_ERR_FATAL) {
3781            /* Stop migration */
3782            break;
3783        } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3784            /*
3785             * Just recovered from a e.g. network failure, reset all
3786             * the local variables. This is important to avoid
3787             * breaking transferred_bytes and bandwidth calculation
3788             */
3789            update_iteration_initial_status(s);
3790        }
3791
3792        urgent = migration_rate_limit();
3793    }
3794
3795out:
3796    trace_migration_thread_after_loop();
3797    migration_iteration_finish(s);
3798    object_unref(OBJECT(s));
3799    rcu_unregister_thread();
3800    migration_threads_remove(thread);
3801    return NULL;
3802}
3803
3804static void bg_migration_vm_start_bh(void *opaque)
3805{
3806    MigrationState *s = opaque;
3807
3808    vm_resume(s->vm_old_state);
3809    migration_downtime_end(s);
3810}
3811
3812/**
3813 * Background snapshot thread, based on live migration code.
3814 * This is an alternative implementation of live migration mechanism
3815 * introduced specifically to support background snapshots.
3816 *
3817 * It takes advantage of userfault_fd write protection mechanism introduced
3818 * in v5.7 kernel. Compared to existing dirty page logging migration much
3819 * lesser stream traffic is produced resulting in smaller snapshot images,
3820 * simply cause of no page duplicates can get into the stream.
3821 *
3822 * Another key point is that generated vmstate stream reflects machine state
3823 * 'frozen' at the beginning of snapshot creation compared to dirty page logging
3824 * mechanism, which effectively results in that saved snapshot is the state of VM
3825 * at the end of the process.
3826 */
3827static void *bg_migration_thread(void *opaque)
3828{
3829    MigrationState *s = opaque;
3830    int64_t setup_start;
3831    MigThrError thr_error;
3832    QEMUFile *fb;
3833    bool early_fail = true;
3834    Error *local_err = NULL;
3835    int ret;
3836
3837    rcu_register_thread();
3838
3839    migration_rate_set(RATE_LIMIT_DISABLED);
3840
3841    setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3842    /*
3843     * We want to save vmstate for the moment when migration has been
3844     * initiated but also we want to save RAM content while VM is running.
3845     * The RAM content should appear first in the vmstate. So, we first
3846     * stash the non-RAM part of the vmstate to the temporary buffer,
3847     * then write RAM part of the vmstate to the migration stream
3848     * with vCPUs running and, finally, write stashed non-RAM part of
3849     * the vmstate from the buffer to the migration stream.
3850     */
3851    s->bioc = qio_channel_buffer_new(512 * 1024);
3852    qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
3853    fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
3854    object_unref(OBJECT(s->bioc));
3855
3856    update_iteration_initial_status(s);
3857
3858    /*
3859     * Prepare for tracking memory writes with UFFD-WP - populate
3860     * RAM pages before protecting.
3861     */
3862#ifdef __linux__
3863    ram_write_tracking_prepare();
3864#endif
3865
3866    bql_lock();
3867    qemu_savevm_state_header(s->to_dst_file);
3868    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
3869    bql_unlock();
3870
3871    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3872                               MIGRATION_STATUS_ACTIVE);
3873
3874    /*
3875     * Handle SETUP failures after waiting for virtio-net-failover
3876     * devices to unplug. This to preserve migration state transitions.
3877     */
3878    if (ret) {
3879        migrate_set_error(s, local_err);
3880        error_free(local_err);
3881        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3882                          MIGRATION_STATUS_FAILED);
3883        goto fail_setup;
3884    }
3885
3886    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3887
3888    trace_migration_thread_setup_complete();
3889
3890    bql_lock();
3891
3892    if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
3893        goto fail;
3894    }
3895
3896    if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) {
3897        goto fail;
3898    }
3899    /*
3900     * Since we are going to get non-iterable state data directly
3901     * from s->bioc->data, explicit flush is needed here.
3902     */
3903    qemu_fflush(fb);
3904
3905    /* Now initialize UFFD context and start tracking RAM writes */
3906    if (ram_write_tracking_start()) {
3907        goto fail;
3908    }
3909    early_fail = false;
3910
3911    /*
3912     * Start VM from BH handler to avoid write-fault lock here.
3913     * UFFD-WP protection for the whole RAM is already enabled so
3914     * calling VM state change notifiers from vm_start() would initiate
3915     * writes to virtio VQs memory which is in write-protected region.
3916     */
3917    migration_bh_schedule(bg_migration_vm_start_bh, s);
3918    bql_unlock();
3919
3920    while (migration_is_active()) {
3921        MigIterateState iter_state = bg_migration_iteration_run(s);
3922
3923        if (iter_state == MIG_ITERATE_BREAK) {
3924            break;
3925        }
3926
3927        /*
3928         * Try to detect any kind of failures, and see whether we
3929         * should stop the migration now.
3930         */
3931        thr_error = migration_detect_error(s);
3932        if (thr_error == MIG_THR_ERR_FATAL) {
3933            /* Stop migration */
3934            break;
3935        }
3936
3937        migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
3938    }
3939
3940    trace_migration_thread_after_loop();
3941
3942fail:
3943    if (early_fail) {
3944        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3945                MIGRATION_STATUS_FAILED);
3946        bql_unlock();
3947    }
3948
3949fail_setup:
3950    bg_migration_iteration_finish(s);
3951
3952    qemu_fclose(fb);
3953    object_unref(OBJECT(s));
3954    rcu_unregister_thread();
3955
3956    return NULL;
3957}
3958
3959void migration_connect(MigrationState *s, Error *error_in)
3960{
3961    Error *local_err = NULL;
3962    uint64_t rate_limit;
3963    bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
3964    int ret;
3965
3966    /*
3967     * If there's a previous error, free it and prepare for another one.
3968     * Meanwhile if migration completes successfully, there won't have an error
3969     * dumped when calling migration_cleanup().
3970     */
3971    migrate_error_free(s);
3972
3973    s->expected_downtime = migrate_downtime_limit();
3974    if (error_in) {
3975        migration_connect_set_error(s, error_in);
3976        if (resume) {
3977            /*
3978             * Don't do cleanup for resume if channel is invalid, but only dump
3979             * the error.  We wait for another channel connect from the user.
3980             * The error_report still gives HMP user a hint on what failed.
3981             * It's normally done in migration_cleanup(), but call it here
3982             * explicitly.
3983             */
3984            error_report_err(error_copy(s->error));
3985        } else {
3986            migration_cleanup(s);
3987        }
3988        return;
3989    }
3990
3991    if (resume) {
3992        /* This is a resumed migration */
3993        rate_limit = migrate_max_postcopy_bandwidth();
3994    } else {
3995        /* This is a fresh new migration */
3996        rate_limit = migrate_max_bandwidth();
3997
3998        /* Notify before starting migration thread */
3999        if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, &local_err)) {
4000            goto fail;
4001        }
4002    }
4003
4004    migration_rate_set(rate_limit);
4005    qemu_file_set_blocking(s->to_dst_file, true);
4006
4007    /*
4008     * Open the return path. For postcopy, it is used exclusively. For
4009     * precopy, only if user specified "return-path" capability would
4010     * QEMU uses the return path.
4011     */
4012    if (migrate_postcopy_ram() || migrate_return_path()) {
4013        if (open_return_path_on_source(s)) {
4014            error_setg(&local_err, "Unable to open return-path for postcopy");
4015            goto fail;
4016        }
4017    }
4018
4019    /*
4020     * This needs to be done before resuming a postcopy.  Note: for newer
4021     * QEMUs we will delay the channel creation until postcopy_start(), to
4022     * avoid disorder of channel creations.
4023     */
4024    if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
4025        postcopy_preempt_setup(s);
4026    }
4027
4028    if (resume) {
4029        /* Wakeup the main migration thread to do the recovery */
4030        migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
4031                          MIGRATION_STATUS_POSTCOPY_RECOVER);
4032        qemu_sem_post(&s->postcopy_pause_sem);
4033        return;
4034    }
4035
4036    if (migrate_mode_is_cpr(s)) {
4037        ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
4038        if (ret < 0) {
4039            error_setg(&local_err, "migration_stop_vm failed, error %d", -ret);
4040            goto fail;
4041        }
4042    }
4043
4044    /*
4045     * Take a refcount to make sure the migration object won't get freed by
4046     * the main thread already in migration_shutdown().
4047     *
4048     * The refcount will be released at the end of the thread function.
4049     */
4050    object_ref(OBJECT(s));
4051
4052    if (migrate_background_snapshot()) {
4053        qemu_thread_create(&s->thread, MIGRATION_THREAD_SNAPSHOT,
4054                bg_migration_thread, s, QEMU_THREAD_JOINABLE);
4055    } else {
4056        qemu_thread_create(&s->thread, MIGRATION_THREAD_SRC_MAIN,
4057                migration_thread, s, QEMU_THREAD_JOINABLE);
4058    }
4059    s->migration_thread_running = true;
4060    return;
4061
4062fail:
4063    migrate_set_error(s, local_err);
4064    if (s->state != MIGRATION_STATUS_CANCELLING) {
4065        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
4066    }
4067    error_report_err(local_err);
4068    migration_cleanup(s);
4069}
4070
4071static void migration_class_init(ObjectClass *klass, const void *data)
4072{
4073    DeviceClass *dc = DEVICE_CLASS(klass);
4074
4075    dc->user_creatable = false;
4076    device_class_set_props_n(dc, migration_properties,
4077                             migration_properties_count);
4078}
4079
4080static void migration_instance_finalize(Object *obj)
4081{
4082    MigrationState *ms = MIGRATION_OBJ(obj);
4083
4084    qemu_mutex_destroy(&ms->error_mutex);
4085    qemu_mutex_destroy(&ms->qemu_file_lock);
4086    qemu_sem_destroy(&ms->wait_unplug_sem);
4087    qemu_sem_destroy(&ms->rate_limit_sem);
4088    qemu_event_destroy(&ms->pause_event);
4089    qemu_sem_destroy(&ms->postcopy_pause_sem);
4090    qemu_sem_destroy(&ms->rp_state.rp_sem);
4091    qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
4092    qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
4093    error_free(ms->error);
4094}
4095
4096static void migration_instance_init(Object *obj)
4097{
4098    MigrationState *ms = MIGRATION_OBJ(obj);
4099
4100    ms->state = MIGRATION_STATUS_NONE;
4101    ms->mbps = -1;
4102    ms->pages_per_second = -1;
4103    qemu_event_init(&ms->pause_event, false);
4104    qemu_mutex_init(&ms->error_mutex);
4105
4106    migrate_params_init(&ms->parameters);
4107
4108    qemu_sem_init(&ms->postcopy_pause_sem, 0);
4109    qemu_sem_init(&ms->rp_state.rp_sem, 0);
4110    qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
4111    qemu_sem_init(&ms->rate_limit_sem, 0);
4112    qemu_sem_init(&ms->wait_unplug_sem, 0);
4113    qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
4114    qemu_mutex_init(&ms->qemu_file_lock);
4115}
4116
4117/*
4118 * Return true if check pass, false otherwise. Error will be put
4119 * inside errp if provided.
4120 */
4121static bool migration_object_check(MigrationState *ms, Error **errp)
4122{
4123    /* Assuming all off */
4124    bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 };
4125
4126    if (!migrate_params_check(&ms->parameters, errp)) {
4127        return false;
4128    }
4129
4130    return migrate_caps_check(old_caps, ms->capabilities, errp);
4131}
4132
4133static const TypeInfo migration_type = {
4134    .name = TYPE_MIGRATION,
4135    /*
4136     * NOTE: TYPE_MIGRATION is not really a device, as the object is
4137     * not created using qdev_new(), it is not attached to the qdev
4138     * device tree, and it is never realized.
4139     *
4140     * TODO: Make this TYPE_OBJECT once QOM provides something like
4141     * TYPE_DEVICE's "-global" properties.
4142     */
4143    .parent = TYPE_DEVICE,
4144    .class_init = migration_class_init,
4145    .class_size = sizeof(MigrationClass),
4146    .instance_size = sizeof(MigrationState),
4147    .instance_init = migration_instance_init,
4148    .instance_finalize = migration_instance_finalize,
4149};
4150
4151static void register_migration_types(void)
4152{
4153    type_register_static(&migration_type);
4154}
4155
4156type_init(register_migration_types);
4157