qemu/migration/migration.c
<<
>>
Prefs
   1/*
   2 * QEMU live migration
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qemu/cutils.h"
  18#include "qemu/error-report.h"
  19#include "migration/blocker.h"
  20#include "exec.h"
  21#include "fd.h"
  22#include "socket.h"
  23#include "rdma.h"
  24#include "ram.h"
  25#include "migration/global_state.h"
  26#include "migration/misc.h"
  27#include "migration.h"
  28#include "savevm.h"
  29#include "qemu-file-channel.h"
  30#include "qemu-file.h"
  31#include "migration/vmstate.h"
  32#include "block/block.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-commands-migration.h"
  35#include "qapi/qapi-events-migration.h"
  36#include "qapi/qmp/qerror.h"
  37#include "qapi/qmp/qnull.h"
  38#include "qemu/rcu.h"
  39#include "block.h"
  40#include "postcopy-ram.h"
  41#include "qemu/thread.h"
  42#include "trace.h"
  43#include "exec/target_page.h"
  44#include "io/channel-buffer.h"
  45#include "migration/colo.h"
  46#include "hw/boards.h"
  47#include "monitor/monitor.h"
  48
  49#define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
  50
  51/* Amount of time to allocate to each "chunk" of bandwidth-throttled
  52 * data. */
  53#define BUFFER_DELAY     100
  54#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  55
  56/* Time in milliseconds we are allowed to stop the source,
  57 * for sending the last part */
  58#define DEFAULT_MIGRATE_SET_DOWNTIME 300
  59
  60/* Maximum migrate downtime set to 2000 seconds */
  61#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
  62#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
  63
  64/* Default compression thread count */
  65#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  66/* Default decompression thread count, usually decompression is at
  67 * least 4 times as fast as compression.*/
  68#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  69/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  70#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  71/* Define default autoconverge cpu throttle migration parameters */
  72#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
  73#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
  74
  75/* Migration XBZRLE default cache size */
  76#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
  77
  78/* The delay time (in ms) between two COLO checkpoints
  79 * Note: Please change this default value to 10000 when we support hybrid mode.
  80 */
  81#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
  82#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
  83#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
  84
  85static NotifierList migration_state_notifiers =
  86    NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  87
  88static bool deferred_incoming;
  89
  90/* Messages sent on the return path from destination to source */
  91enum mig_rp_message_type {
  92    MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
  93    MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
  94    MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
  95
  96    MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  97    MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
  98
  99    MIG_RP_MSG_MAX
 100};
 101
 102/* When we add fault tolerance, we could have several
 103   migrations at once.  For now we don't need to add
 104   dynamic creation of migration */
 105
 106static MigrationState *current_migration;
 107
 108static bool migration_object_check(MigrationState *ms, Error **errp);
 109static int migration_maybe_pause(MigrationState *s,
 110                                 int *current_active_state,
 111                                 int new_state);
 112
 113void migration_object_init(void)
 114{
 115    MachineState *ms = MACHINE(qdev_get_machine());
 116    Error *err = NULL;
 117
 118    /* This can only be called once. */
 119    assert(!current_migration);
 120    current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
 121
 122    if (!migration_object_check(current_migration, &err)) {
 123        error_report_err(err);
 124        exit(1);
 125    }
 126
 127    /*
 128     * We cannot really do this in migration_instance_init() since at
 129     * that time global properties are not yet applied, then this
 130     * value will be definitely replaced by something else.
 131     */
 132    if (ms->enforce_config_section) {
 133        current_migration->send_configuration = true;
 134    }
 135}
 136
 137void migration_object_finalize(void)
 138{
 139    object_unref(OBJECT(current_migration));
 140}
 141
 142/* For outgoing */
 143MigrationState *migrate_get_current(void)
 144{
 145    /* This can only be called after the object created. */
 146    assert(current_migration);
 147    return current_migration;
 148}
 149
 150MigrationIncomingState *migration_incoming_get_current(void)
 151{
 152    static bool once;
 153    static MigrationIncomingState mis_current;
 154
 155    if (!once) {
 156        mis_current.state = MIGRATION_STATUS_NONE;
 157        memset(&mis_current, 0, sizeof(MigrationIncomingState));
 158        mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
 159                                                   sizeof(struct PostCopyFD));
 160        qemu_mutex_init(&mis_current.rp_mutex);
 161        qemu_event_init(&mis_current.main_thread_load_event, false);
 162
 163        init_dirty_bitmap_incoming_migration();
 164
 165        once = true;
 166    }
 167    return &mis_current;
 168}
 169
 170void migration_incoming_state_destroy(void)
 171{
 172    struct MigrationIncomingState *mis = migration_incoming_get_current();
 173
 174    if (mis->to_src_file) {
 175        /* Tell source that we are done */
 176        migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 177        qemu_fclose(mis->to_src_file);
 178        mis->to_src_file = NULL;
 179    }
 180
 181    if (mis->from_src_file) {
 182        qemu_fclose(mis->from_src_file);
 183        mis->from_src_file = NULL;
 184    }
 185    if (mis->postcopy_remote_fds) {
 186        g_array_free(mis->postcopy_remote_fds, TRUE);
 187        mis->postcopy_remote_fds = NULL;
 188    }
 189
 190    qemu_event_reset(&mis->main_thread_load_event);
 191}
 192
 193static void migrate_generate_event(int new_state)
 194{
 195    if (migrate_use_events()) {
 196        qapi_event_send_migration(new_state, &error_abort);
 197    }
 198}
 199
 200/*
 201 * Called on -incoming with a defer: uri.
 202 * The migration can be started later after any parameters have been
 203 * changed.
 204 */
 205static void deferred_incoming_migration(Error **errp)
 206{
 207    if (deferred_incoming) {
 208        error_setg(errp, "Incoming migration already deferred");
 209    }
 210    deferred_incoming = true;
 211}
 212
 213/*
 214 * Send a message on the return channel back to the source
 215 * of the migration.
 216 */
 217static int migrate_send_rp_message(MigrationIncomingState *mis,
 218                                   enum mig_rp_message_type message_type,
 219                                   uint16_t len, void *data)
 220{
 221    int ret = 0;
 222
 223    trace_migrate_send_rp_message((int)message_type, len);
 224    qemu_mutex_lock(&mis->rp_mutex);
 225
 226    /*
 227     * It's possible that the file handle got lost due to network
 228     * failures.
 229     */
 230    if (!mis->to_src_file) {
 231        ret = -EIO;
 232        goto error;
 233    }
 234
 235    qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
 236    qemu_put_be16(mis->to_src_file, len);
 237    qemu_put_buffer(mis->to_src_file, data, len);
 238    qemu_fflush(mis->to_src_file);
 239
 240    /* It's possible that qemu file got error during sending */
 241    ret = qemu_file_get_error(mis->to_src_file);
 242
 243error:
 244    qemu_mutex_unlock(&mis->rp_mutex);
 245    return ret;
 246}
 247
 248/* Request a range of pages from the source VM at the given
 249 * start address.
 250 *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
 251 *           as the last request (a name must have been given previously)
 252 *   Start: Address offset within the RB
 253 *   Len: Length in bytes required - must be a multiple of pagesize
 254 */
 255int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
 256                              ram_addr_t start, size_t len)
 257{
 258    uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
 259    size_t msglen = 12; /* start + len */
 260    enum mig_rp_message_type msg_type;
 261
 262    *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
 263    *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
 264
 265    if (rbname) {
 266        int rbname_len = strlen(rbname);
 267        assert(rbname_len < 256);
 268
 269        bufc[msglen++] = rbname_len;
 270        memcpy(bufc + msglen, rbname, rbname_len);
 271        msglen += rbname_len;
 272        msg_type = MIG_RP_MSG_REQ_PAGES_ID;
 273    } else {
 274        msg_type = MIG_RP_MSG_REQ_PAGES;
 275    }
 276
 277    return migrate_send_rp_message(mis, msg_type, msglen, bufc);
 278}
 279
 280void qemu_start_incoming_migration(const char *uri, Error **errp)
 281{
 282    const char *p;
 283
 284    qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
 285    if (!strcmp(uri, "defer")) {
 286        deferred_incoming_migration(errp);
 287    } else if (strstart(uri, "tcp:", &p)) {
 288        tcp_start_incoming_migration(p, errp);
 289#ifdef CONFIG_RDMA
 290    } else if (strstart(uri, "rdma:", &p)) {
 291        rdma_start_incoming_migration(p, errp);
 292#endif
 293    } else if (strstart(uri, "exec:", &p)) {
 294        exec_start_incoming_migration(p, errp);
 295    } else if (strstart(uri, "unix:", &p)) {
 296        unix_start_incoming_migration(p, errp);
 297    } else if (strstart(uri, "fd:", &p)) {
 298        fd_start_incoming_migration(p, errp);
 299    } else {
 300        error_setg(errp, "unknown migration protocol: %s", uri);
 301    }
 302}
 303
 304static void process_incoming_migration_bh(void *opaque)
 305{
 306    Error *local_err = NULL;
 307    MigrationIncomingState *mis = opaque;
 308
 309    /* Make sure all file formats flush their mutable metadata.
 310     * If we get an error here, just don't restart the VM yet. */
 311    bdrv_invalidate_cache_all(&local_err);
 312    if (local_err) {
 313        error_report_err(local_err);
 314        local_err = NULL;
 315        autostart = false;
 316    }
 317
 318    /*
 319     * This must happen after all error conditions are dealt with and
 320     * we're sure the VM is going to be running on this host.
 321     */
 322    qemu_announce_self();
 323
 324    if (multifd_load_cleanup(&local_err) != 0) {
 325        error_report_err(local_err);
 326        autostart = false;
 327    }
 328    /* If global state section was not received or we are in running
 329       state, we need to obey autostart. Any other state is set with
 330       runstate_set. */
 331
 332    dirty_bitmap_mig_before_vm_start();
 333
 334    if (!global_state_received() ||
 335        global_state_get_runstate() == RUN_STATE_RUNNING) {
 336        if (autostart) {
 337            vm_start();
 338        } else {
 339            runstate_set(RUN_STATE_PAUSED);
 340        }
 341    } else {
 342        runstate_set(global_state_get_runstate());
 343    }
 344    /*
 345     * This must happen after any state changes since as soon as an external
 346     * observer sees this event they might start to prod at the VM assuming
 347     * it's ready to use.
 348     */
 349    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 350                      MIGRATION_STATUS_COMPLETED);
 351    qemu_bh_delete(mis->bh);
 352    migration_incoming_state_destroy();
 353}
 354
 355static void process_incoming_migration_co(void *opaque)
 356{
 357    MigrationIncomingState *mis = migration_incoming_get_current();
 358    PostcopyState ps;
 359    int ret;
 360
 361    assert(mis->from_src_file);
 362    mis->largest_page_size = qemu_ram_pagesize_largest();
 363    postcopy_state_set(POSTCOPY_INCOMING_NONE);
 364    migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
 365                      MIGRATION_STATUS_ACTIVE);
 366    ret = qemu_loadvm_state(mis->from_src_file);
 367
 368    ps = postcopy_state_get();
 369    trace_process_incoming_migration_co_end(ret, ps);
 370    if (ps != POSTCOPY_INCOMING_NONE) {
 371        if (ps == POSTCOPY_INCOMING_ADVISE) {
 372            /*
 373             * Where a migration had postcopy enabled (and thus went to advise)
 374             * but managed to complete within the precopy period, we can use
 375             * the normal exit.
 376             */
 377            postcopy_ram_incoming_cleanup(mis);
 378        } else if (ret >= 0) {
 379            /*
 380             * Postcopy was started, cleanup should happen at the end of the
 381             * postcopy thread.
 382             */
 383            trace_process_incoming_migration_co_postcopy_end_main();
 384            return;
 385        }
 386        /* Else if something went wrong then just fall out of the normal exit */
 387    }
 388
 389    /* we get COLO info, and know if we are in COLO mode */
 390    if (!ret && migration_incoming_enable_colo()) {
 391        mis->migration_incoming_co = qemu_coroutine_self();
 392        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
 393             colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
 394        mis->have_colo_incoming_thread = true;
 395        qemu_coroutine_yield();
 396
 397        /* Wait checkpoint incoming thread exit before free resource */
 398        qemu_thread_join(&mis->colo_incoming_thread);
 399    }
 400
 401    if (ret < 0) {
 402        Error *local_err = NULL;
 403
 404        migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 405                          MIGRATION_STATUS_FAILED);
 406        error_report("load of migration failed: %s", strerror(-ret));
 407        qemu_fclose(mis->from_src_file);
 408        if (multifd_load_cleanup(&local_err) != 0) {
 409            error_report_err(local_err);
 410        }
 411        exit(EXIT_FAILURE);
 412    }
 413    mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
 414    qemu_bh_schedule(mis->bh);
 415}
 416
 417static void migration_incoming_setup(QEMUFile *f)
 418{
 419    MigrationIncomingState *mis = migration_incoming_get_current();
 420
 421    if (multifd_load_setup() != 0) {
 422        /* We haven't been able to create multifd threads
 423           nothing better to do */
 424        exit(EXIT_FAILURE);
 425    }
 426
 427    if (!mis->from_src_file) {
 428        mis->from_src_file = f;
 429    }
 430    qemu_file_set_blocking(f, false);
 431}
 432
 433static void migration_incoming_process(void)
 434{
 435    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
 436    qemu_coroutine_enter(co);
 437}
 438
 439void migration_fd_process_incoming(QEMUFile *f)
 440{
 441    migration_incoming_setup(f);
 442    migration_incoming_process();
 443}
 444
 445void migration_ioc_process_incoming(QIOChannel *ioc)
 446{
 447    MigrationIncomingState *mis = migration_incoming_get_current();
 448
 449    if (!mis->from_src_file) {
 450        QEMUFile *f = qemu_fopen_channel_input(ioc);
 451        migration_fd_process_incoming(f);
 452    }
 453    /* We still only have a single channel.  Nothing to do here yet */
 454}
 455
 456/**
 457 * @migration_has_all_channels: We have received all channels that we need
 458 *
 459 * Returns true when we have got connections to all the channels that
 460 * we need for migration.
 461 */
 462bool migration_has_all_channels(void)
 463{
 464    return true;
 465}
 466
 467/*
 468 * Send a 'SHUT' message on the return channel with the given value
 469 * to indicate that we've finished with the RP.  Non-0 value indicates
 470 * error.
 471 */
 472void migrate_send_rp_shut(MigrationIncomingState *mis,
 473                          uint32_t value)
 474{
 475    uint32_t buf;
 476
 477    buf = cpu_to_be32(value);
 478    migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
 479}
 480
 481/*
 482 * Send a 'PONG' message on the return channel with the given value
 483 * (normally in response to a 'PING')
 484 */
 485void migrate_send_rp_pong(MigrationIncomingState *mis,
 486                          uint32_t value)
 487{
 488    uint32_t buf;
 489
 490    buf = cpu_to_be32(value);
 491    migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
 492}
 493
 494MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
 495{
 496    MigrationCapabilityStatusList *head = NULL;
 497    MigrationCapabilityStatusList *caps;
 498    MigrationState *s = migrate_get_current();
 499    int i;
 500
 501    caps = NULL; /* silence compiler warning */
 502    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 503#ifndef CONFIG_LIVE_BLOCK_MIGRATION
 504        if (i == MIGRATION_CAPABILITY_BLOCK) {
 505            continue;
 506        }
 507#endif
 508        if (head == NULL) {
 509            head = g_malloc0(sizeof(*caps));
 510            caps = head;
 511        } else {
 512            caps->next = g_malloc0(sizeof(*caps));
 513            caps = caps->next;
 514        }
 515        caps->value =
 516            g_malloc(sizeof(*caps->value));
 517        caps->value->capability = i;
 518        caps->value->state = s->enabled_capabilities[i];
 519    }
 520
 521    return head;
 522}
 523
 524MigrationParameters *qmp_query_migrate_parameters(Error **errp)
 525{
 526    MigrationParameters *params;
 527    MigrationState *s = migrate_get_current();
 528
 529    /* TODO use QAPI_CLONE() instead of duplicating it inline */
 530    params = g_malloc0(sizeof(*params));
 531    params->has_compress_level = true;
 532    params->compress_level = s->parameters.compress_level;
 533    params->has_compress_threads = true;
 534    params->compress_threads = s->parameters.compress_threads;
 535    params->has_decompress_threads = true;
 536    params->decompress_threads = s->parameters.decompress_threads;
 537    params->has_cpu_throttle_initial = true;
 538    params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
 539    params->has_cpu_throttle_increment = true;
 540    params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
 541    params->has_tls_creds = true;
 542    params->tls_creds = g_strdup(s->parameters.tls_creds);
 543    params->has_tls_hostname = true;
 544    params->tls_hostname = g_strdup(s->parameters.tls_hostname);
 545    params->has_max_bandwidth = true;
 546    params->max_bandwidth = s->parameters.max_bandwidth;
 547    params->has_downtime_limit = true;
 548    params->downtime_limit = s->parameters.downtime_limit;
 549    params->has_x_checkpoint_delay = true;
 550    params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
 551    params->has_block_incremental = true;
 552    params->block_incremental = s->parameters.block_incremental;
 553    params->has_x_multifd_channels = true;
 554    params->x_multifd_channels = s->parameters.x_multifd_channels;
 555    params->has_x_multifd_page_count = true;
 556    params->x_multifd_page_count = s->parameters.x_multifd_page_count;
 557    params->has_xbzrle_cache_size = true;
 558    params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
 559
 560    return params;
 561}
 562
 563/*
 564 * Return true if we're already in the middle of a migration
 565 * (i.e. any of the active or setup states)
 566 */
 567static bool migration_is_setup_or_active(int state)
 568{
 569    switch (state) {
 570    case MIGRATION_STATUS_ACTIVE:
 571    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
 572    case MIGRATION_STATUS_SETUP:
 573    case MIGRATION_STATUS_PRE_SWITCHOVER:
 574    case MIGRATION_STATUS_DEVICE:
 575        return true;
 576
 577    default:
 578        return false;
 579
 580    }
 581}
 582
 583static void populate_ram_info(MigrationInfo *info, MigrationState *s)
 584{
 585    info->has_ram = true;
 586    info->ram = g_malloc0(sizeof(*info->ram));
 587    info->ram->transferred = ram_counters.transferred;
 588    info->ram->total = ram_bytes_total();
 589    info->ram->duplicate = ram_counters.duplicate;
 590    /* legacy value.  It is not used anymore */
 591    info->ram->skipped = 0;
 592    info->ram->normal = ram_counters.normal;
 593    info->ram->normal_bytes = ram_counters.normal *
 594        qemu_target_page_size();
 595    info->ram->mbps = s->mbps;
 596    info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
 597    info->ram->postcopy_requests = ram_counters.postcopy_requests;
 598    info->ram->page_size = qemu_target_page_size();
 599
 600    if (migrate_use_xbzrle()) {
 601        info->has_xbzrle_cache = true;
 602        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
 603        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
 604        info->xbzrle_cache->bytes = xbzrle_counters.bytes;
 605        info->xbzrle_cache->pages = xbzrle_counters.pages;
 606        info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
 607        info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
 608        info->xbzrle_cache->overflow = xbzrle_counters.overflow;
 609    }
 610
 611    if (cpu_throttle_active()) {
 612        info->has_cpu_throttle_percentage = true;
 613        info->cpu_throttle_percentage = cpu_throttle_get_percentage();
 614    }
 615
 616    if (s->state != MIGRATION_STATUS_COMPLETED) {
 617        info->ram->remaining = ram_bytes_remaining();
 618        info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
 619    }
 620}
 621
 622static void populate_disk_info(MigrationInfo *info)
 623{
 624    if (blk_mig_active()) {
 625        info->has_disk = true;
 626        info->disk = g_malloc0(sizeof(*info->disk));
 627        info->disk->transferred = blk_mig_bytes_transferred();
 628        info->disk->remaining = blk_mig_bytes_remaining();
 629        info->disk->total = blk_mig_bytes_total();
 630    }
 631}
 632
 633MigrationInfo *qmp_query_migrate(Error **errp)
 634{
 635    MigrationInfo *info = g_malloc0(sizeof(*info));
 636    MigrationState *s = migrate_get_current();
 637
 638    switch (s->state) {
 639    case MIGRATION_STATUS_NONE:
 640        /* no migration has happened ever */
 641        break;
 642    case MIGRATION_STATUS_SETUP:
 643        info->has_status = true;
 644        info->has_total_time = false;
 645        break;
 646    case MIGRATION_STATUS_ACTIVE:
 647    case MIGRATION_STATUS_CANCELLING:
 648    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
 649    case MIGRATION_STATUS_PRE_SWITCHOVER:
 650    case MIGRATION_STATUS_DEVICE:
 651         /* TODO add some postcopy stats */
 652        info->has_status = true;
 653        info->has_total_time = true;
 654        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
 655            - s->start_time;
 656        info->has_expected_downtime = true;
 657        info->expected_downtime = s->expected_downtime;
 658        info->has_setup_time = true;
 659        info->setup_time = s->setup_time;
 660
 661        populate_ram_info(info, s);
 662        populate_disk_info(info);
 663        break;
 664    case MIGRATION_STATUS_COLO:
 665        info->has_status = true;
 666        /* TODO: display COLO specific information (checkpoint info etc.) */
 667        break;
 668    case MIGRATION_STATUS_COMPLETED:
 669        info->has_status = true;
 670        info->has_total_time = true;
 671        info->total_time = s->total_time;
 672        info->has_downtime = true;
 673        info->downtime = s->downtime;
 674        info->has_setup_time = true;
 675        info->setup_time = s->setup_time;
 676
 677        populate_ram_info(info, s);
 678        break;
 679    case MIGRATION_STATUS_FAILED:
 680        info->has_status = true;
 681        if (s->error) {
 682            info->has_error_desc = true;
 683            info->error_desc = g_strdup(error_get_pretty(s->error));
 684        }
 685        break;
 686    case MIGRATION_STATUS_CANCELLED:
 687        info->has_status = true;
 688        break;
 689    }
 690    info->status = s->state;
 691
 692    return info;
 693}
 694
 695/**
 696 * @migration_caps_check - check capability validity
 697 *
 698 * @cap_list: old capability list, array of bool
 699 * @params: new capabilities to be applied soon
 700 * @errp: set *errp if the check failed, with reason
 701 *
 702 * Returns true if check passed, otherwise false.
 703 */
 704static bool migrate_caps_check(bool *cap_list,
 705                               MigrationCapabilityStatusList *params,
 706                               Error **errp)
 707{
 708    MigrationCapabilityStatusList *cap;
 709    bool old_postcopy_cap;
 710    MigrationIncomingState *mis = migration_incoming_get_current();
 711
 712    old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
 713
 714    for (cap = params; cap; cap = cap->next) {
 715        cap_list[cap->value->capability] = cap->value->state;
 716    }
 717
 718#ifndef CONFIG_LIVE_BLOCK_MIGRATION
 719    if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
 720        error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
 721                   "block migration");
 722        error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
 723        return false;
 724    }
 725#endif
 726
 727    if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
 728        if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
 729            /* The decompression threads asynchronously write into RAM
 730             * rather than use the atomic copies needed to avoid
 731             * userfaulting.  It should be possible to fix the decompression
 732             * threads for compatibility in future.
 733             */
 734            error_setg(errp, "Postcopy is not currently compatible "
 735                       "with compression");
 736            return false;
 737        }
 738
 739        /* This check is reasonably expensive, so only when it's being
 740         * set the first time, also it's only the destination that needs
 741         * special support.
 742         */
 743        if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
 744            !postcopy_ram_supported_by_host(mis)) {
 745            /* postcopy_ram_supported_by_host will have emitted a more
 746             * detailed message
 747             */
 748            error_setg(errp, "Postcopy is not supported");
 749            return false;
 750        }
 751    }
 752
 753    return true;
 754}
 755
 756void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 757                                  Error **errp)
 758{
 759    MigrationState *s = migrate_get_current();
 760    MigrationCapabilityStatusList *cap;
 761    bool cap_list[MIGRATION_CAPABILITY__MAX];
 762
 763    if (migration_is_setup_or_active(s->state)) {
 764        error_setg(errp, QERR_MIGRATION_ACTIVE);
 765        return;
 766    }
 767
 768    memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
 769    if (!migrate_caps_check(cap_list, params, errp)) {
 770        return;
 771    }
 772
 773    for (cap = params; cap; cap = cap->next) {
 774        s->enabled_capabilities[cap->value->capability] = cap->value->state;
 775    }
 776}
 777
 778/*
 779 * Check whether the parameters are valid. Error will be put into errp
 780 * (if provided). Return true if valid, otherwise false.
 781 */
 782static bool migrate_params_check(MigrationParameters *params, Error **errp)
 783{
 784    if (params->has_compress_level &&
 785        (params->compress_level > 9)) {
 786        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
 787                   "is invalid, it should be in the range of 0 to 9");
 788        return false;
 789    }
 790
 791    if (params->has_compress_threads && (params->compress_threads < 1)) {
 792        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 793                   "compress_threads",
 794                   "is invalid, it should be in the range of 1 to 255");
 795        return false;
 796    }
 797
 798    if (params->has_decompress_threads && (params->decompress_threads < 1)) {
 799        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 800                   "decompress_threads",
 801                   "is invalid, it should be in the range of 1 to 255");
 802        return false;
 803    }
 804
 805    if (params->has_cpu_throttle_initial &&
 806        (params->cpu_throttle_initial < 1 ||
 807         params->cpu_throttle_initial > 99)) {
 808        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 809                   "cpu_throttle_initial",
 810                   "an integer in the range of 1 to 99");
 811        return false;
 812    }
 813
 814    if (params->has_cpu_throttle_increment &&
 815        (params->cpu_throttle_increment < 1 ||
 816         params->cpu_throttle_increment > 99)) {
 817        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 818                   "cpu_throttle_increment",
 819                   "an integer in the range of 1 to 99");
 820        return false;
 821    }
 822
 823    if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
 824        error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
 825                         " range of 0 to %zu bytes/second", SIZE_MAX);
 826        return false;
 827    }
 828
 829    if (params->has_downtime_limit &&
 830        (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
 831        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
 832                         "the range of 0 to %d milliseconds",
 833                         MAX_MIGRATE_DOWNTIME);
 834        return false;
 835    }
 836
 837    /* x_checkpoint_delay is now always positive */
 838
 839    if (params->has_x_multifd_channels && (params->x_multifd_channels < 1)) {
 840        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 841                   "multifd_channels",
 842                   "is invalid, it should be in the range of 1 to 255");
 843        return false;
 844    }
 845    if (params->has_x_multifd_page_count &&
 846        (params->x_multifd_page_count < 1 ||
 847         params->x_multifd_page_count > 10000)) {
 848        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 849                   "multifd_page_count",
 850                   "is invalid, it should be in the range of 1 to 10000");
 851        return false;
 852    }
 853
 854    if (params->has_xbzrle_cache_size &&
 855        (params->xbzrle_cache_size < qemu_target_page_size() ||
 856         !is_power_of_2(params->xbzrle_cache_size))) {
 857        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 858                   "xbzrle_cache_size",
 859                   "is invalid, it should be bigger than target page size"
 860                   " and a power of two");
 861        return false;
 862    }
 863
 864    return true;
 865}
 866
 867static void migrate_params_test_apply(MigrateSetParameters *params,
 868                                      MigrationParameters *dest)
 869{
 870    *dest = migrate_get_current()->parameters;
 871
 872    /* TODO use QAPI_CLONE() instead of duplicating it inline */
 873
 874    if (params->has_compress_level) {
 875        dest->compress_level = params->compress_level;
 876    }
 877
 878    if (params->has_compress_threads) {
 879        dest->compress_threads = params->compress_threads;
 880    }
 881
 882    if (params->has_decompress_threads) {
 883        dest->decompress_threads = params->decompress_threads;
 884    }
 885
 886    if (params->has_cpu_throttle_initial) {
 887        dest->cpu_throttle_initial = params->cpu_throttle_initial;
 888    }
 889
 890    if (params->has_cpu_throttle_increment) {
 891        dest->cpu_throttle_increment = params->cpu_throttle_increment;
 892    }
 893
 894    if (params->has_tls_creds) {
 895        assert(params->tls_creds->type == QTYPE_QSTRING);
 896        dest->tls_creds = g_strdup(params->tls_creds->u.s);
 897    }
 898
 899    if (params->has_tls_hostname) {
 900        assert(params->tls_hostname->type == QTYPE_QSTRING);
 901        dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
 902    }
 903
 904    if (params->has_max_bandwidth) {
 905        dest->max_bandwidth = params->max_bandwidth;
 906    }
 907
 908    if (params->has_downtime_limit) {
 909        dest->downtime_limit = params->downtime_limit;
 910    }
 911
 912    if (params->has_x_checkpoint_delay) {
 913        dest->x_checkpoint_delay = params->x_checkpoint_delay;
 914    }
 915
 916    if (params->has_block_incremental) {
 917        dest->block_incremental = params->block_incremental;
 918    }
 919    if (params->has_x_multifd_channels) {
 920        dest->x_multifd_channels = params->x_multifd_channels;
 921    }
 922    if (params->has_x_multifd_page_count) {
 923        dest->x_multifd_page_count = params->x_multifd_page_count;
 924    }
 925    if (params->has_xbzrle_cache_size) {
 926        dest->xbzrle_cache_size = params->xbzrle_cache_size;
 927    }
 928}
 929
 930static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
 931{
 932    MigrationState *s = migrate_get_current();
 933
 934    /* TODO use QAPI_CLONE() instead of duplicating it inline */
 935
 936    if (params->has_compress_level) {
 937        s->parameters.compress_level = params->compress_level;
 938    }
 939
 940    if (params->has_compress_threads) {
 941        s->parameters.compress_threads = params->compress_threads;
 942    }
 943
 944    if (params->has_decompress_threads) {
 945        s->parameters.decompress_threads = params->decompress_threads;
 946    }
 947
 948    if (params->has_cpu_throttle_initial) {
 949        s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
 950    }
 951
 952    if (params->has_cpu_throttle_increment) {
 953        s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
 954    }
 955
 956    if (params->has_tls_creds) {
 957        g_free(s->parameters.tls_creds);
 958        assert(params->tls_creds->type == QTYPE_QSTRING);
 959        s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
 960    }
 961
 962    if (params->has_tls_hostname) {
 963        g_free(s->parameters.tls_hostname);
 964        assert(params->tls_hostname->type == QTYPE_QSTRING);
 965        s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
 966    }
 967
 968    if (params->has_max_bandwidth) {
 969        s->parameters.max_bandwidth = params->max_bandwidth;
 970        if (s->to_dst_file) {
 971            qemu_file_set_rate_limit(s->to_dst_file,
 972                                s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
 973        }
 974    }
 975
 976    if (params->has_downtime_limit) {
 977        s->parameters.downtime_limit = params->downtime_limit;
 978    }
 979
 980    if (params->has_x_checkpoint_delay) {
 981        s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
 982        if (migration_in_colo_state()) {
 983            colo_checkpoint_notify(s);
 984        }
 985    }
 986
 987    if (params->has_block_incremental) {
 988        s->parameters.block_incremental = params->block_incremental;
 989    }
 990    if (params->has_x_multifd_channels) {
 991        s->parameters.x_multifd_channels = params->x_multifd_channels;
 992    }
 993    if (params->has_x_multifd_page_count) {
 994        s->parameters.x_multifd_page_count = params->x_multifd_page_count;
 995    }
 996    if (params->has_xbzrle_cache_size) {
 997        s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
 998        xbzrle_cache_resize(params->xbzrle_cache_size, errp);
 999    }
1000}
1001
1002void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1003{
1004    MigrationParameters tmp;
1005
1006    /* TODO Rewrite "" to null instead */
1007    if (params->has_tls_creds
1008        && params->tls_creds->type == QTYPE_QNULL) {
1009        QDECREF(params->tls_creds->u.n);
1010        params->tls_creds->type = QTYPE_QSTRING;
1011        params->tls_creds->u.s = strdup("");
1012    }
1013    /* TODO Rewrite "" to null instead */
1014    if (params->has_tls_hostname
1015        && params->tls_hostname->type == QTYPE_QNULL) {
1016        QDECREF(params->tls_hostname->u.n);
1017        params->tls_hostname->type = QTYPE_QSTRING;
1018        params->tls_hostname->u.s = strdup("");
1019    }
1020
1021    migrate_params_test_apply(params, &tmp);
1022
1023    if (!migrate_params_check(&tmp, errp)) {
1024        /* Invalid parameter */
1025        return;
1026    }
1027
1028    migrate_params_apply(params, errp);
1029}
1030
1031
1032void qmp_migrate_start_postcopy(Error **errp)
1033{
1034    MigrationState *s = migrate_get_current();
1035
1036    if (!migrate_postcopy()) {
1037        error_setg(errp, "Enable postcopy with migrate_set_capability before"
1038                         " the start of migration");
1039        return;
1040    }
1041
1042    if (s->state == MIGRATION_STATUS_NONE) {
1043        error_setg(errp, "Postcopy must be started after migration has been"
1044                         " started");
1045        return;
1046    }
1047    /*
1048     * we don't error if migration has finished since that would be racy
1049     * with issuing this command.
1050     */
1051    atomic_set(&s->start_postcopy, true);
1052}
1053
1054/* shared migration helpers */
1055
1056void migrate_set_state(int *state, int old_state, int new_state)
1057{
1058    assert(new_state < MIGRATION_STATUS__MAX);
1059    if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
1060        trace_migrate_set_state(MigrationStatus_str(new_state));
1061        migrate_generate_event(new_state);
1062    }
1063}
1064
1065static MigrationCapabilityStatusList *migrate_cap_add(
1066    MigrationCapabilityStatusList *list,
1067    MigrationCapability index,
1068    bool state)
1069{
1070    MigrationCapabilityStatusList *cap;
1071
1072    cap = g_new0(MigrationCapabilityStatusList, 1);
1073    cap->value = g_new0(MigrationCapabilityStatus, 1);
1074    cap->value->capability = index;
1075    cap->value->state = state;
1076    cap->next = list;
1077
1078    return cap;
1079}
1080
1081void migrate_set_block_enabled(bool value, Error **errp)
1082{
1083    MigrationCapabilityStatusList *cap;
1084
1085    cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
1086    qmp_migrate_set_capabilities(cap, errp);
1087    qapi_free_MigrationCapabilityStatusList(cap);
1088}
1089
1090static void migrate_set_block_incremental(MigrationState *s, bool value)
1091{
1092    s->parameters.block_incremental = value;
1093}
1094
1095static void block_cleanup_parameters(MigrationState *s)
1096{
1097    if (s->must_remove_block_options) {
1098        /* setting to false can never fail */
1099        migrate_set_block_enabled(false, &error_abort);
1100        migrate_set_block_incremental(s, false);
1101        s->must_remove_block_options = false;
1102    }
1103}
1104
1105static void migrate_fd_cleanup(void *opaque)
1106{
1107    MigrationState *s = opaque;
1108
1109    qemu_bh_delete(s->cleanup_bh);
1110    s->cleanup_bh = NULL;
1111
1112    qemu_savevm_state_cleanup();
1113
1114    if (s->to_dst_file) {
1115        Error *local_err = NULL;
1116
1117        trace_migrate_fd_cleanup();
1118        qemu_mutex_unlock_iothread();
1119        if (s->migration_thread_running) {
1120            qemu_thread_join(&s->thread);
1121            s->migration_thread_running = false;
1122        }
1123        qemu_mutex_lock_iothread();
1124
1125        if (multifd_save_cleanup(&local_err) != 0) {
1126            error_report_err(local_err);
1127        }
1128        qemu_fclose(s->to_dst_file);
1129        s->to_dst_file = NULL;
1130    }
1131
1132    assert((s->state != MIGRATION_STATUS_ACTIVE) &&
1133           (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
1134
1135    if (s->state == MIGRATION_STATUS_CANCELLING) {
1136        migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1137                          MIGRATION_STATUS_CANCELLED);
1138    }
1139
1140    if (s->error) {
1141        /* It is used on info migrate.  We can't free it */
1142        error_report_err(error_copy(s->error));
1143    }
1144    notifier_list_notify(&migration_state_notifiers, s);
1145    block_cleanup_parameters(s);
1146}
1147
1148void migrate_set_error(MigrationState *s, const Error *error)
1149{
1150    qemu_mutex_lock(&s->error_mutex);
1151    if (!s->error) {
1152        s->error = error_copy(error);
1153    }
1154    qemu_mutex_unlock(&s->error_mutex);
1155}
1156
1157void migrate_fd_error(MigrationState *s, const Error *error)
1158{
1159    trace_migrate_fd_error(error_get_pretty(error));
1160    assert(s->to_dst_file == NULL);
1161    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1162                      MIGRATION_STATUS_FAILED);
1163    migrate_set_error(s, error);
1164}
1165
1166static void migrate_fd_cancel(MigrationState *s)
1167{
1168    int old_state ;
1169    QEMUFile *f = migrate_get_current()->to_dst_file;
1170    trace_migrate_fd_cancel();
1171
1172    if (s->rp_state.from_dst_file) {
1173        /* shutdown the rp socket, so causing the rp thread to shutdown */
1174        qemu_file_shutdown(s->rp_state.from_dst_file);
1175    }
1176
1177    do {
1178        old_state = s->state;
1179        if (!migration_is_setup_or_active(old_state)) {
1180            break;
1181        }
1182        /* If the migration is paused, kick it out of the pause */
1183        if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1184            qemu_sem_post(&s->pause_sem);
1185        }
1186        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1187    } while (s->state != MIGRATION_STATUS_CANCELLING);
1188
1189    /*
1190     * If we're unlucky the migration code might be stuck somewhere in a
1191     * send/write while the network has failed and is waiting to timeout;
1192     * if we've got shutdown(2) available then we can force it to quit.
1193     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1194     * called in a bh, so there is no race against this cancel.
1195     */
1196    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1197        qemu_file_shutdown(f);
1198    }
1199    if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1200        Error *local_err = NULL;
1201
1202        bdrv_invalidate_cache_all(&local_err);
1203        if (local_err) {
1204            error_report_err(local_err);
1205        } else {
1206            s->block_inactive = false;
1207        }
1208    }
1209}
1210
1211void add_migration_state_change_notifier(Notifier *notify)
1212{
1213    notifier_list_add(&migration_state_notifiers, notify);
1214}
1215
1216void remove_migration_state_change_notifier(Notifier *notify)
1217{
1218    notifier_remove(notify);
1219}
1220
1221bool migration_in_setup(MigrationState *s)
1222{
1223    return s->state == MIGRATION_STATUS_SETUP;
1224}
1225
1226bool migration_has_finished(MigrationState *s)
1227{
1228    return s->state == MIGRATION_STATUS_COMPLETED;
1229}
1230
1231bool migration_has_failed(MigrationState *s)
1232{
1233    return (s->state == MIGRATION_STATUS_CANCELLED ||
1234            s->state == MIGRATION_STATUS_FAILED);
1235}
1236
1237bool migration_in_postcopy(void)
1238{
1239    MigrationState *s = migrate_get_current();
1240
1241    return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1242}
1243
1244bool migration_in_postcopy_after_devices(MigrationState *s)
1245{
1246    return migration_in_postcopy() && s->postcopy_after_devices;
1247}
1248
1249bool migration_is_idle(void)
1250{
1251    MigrationState *s = migrate_get_current();
1252
1253    switch (s->state) {
1254    case MIGRATION_STATUS_NONE:
1255    case MIGRATION_STATUS_CANCELLED:
1256    case MIGRATION_STATUS_COMPLETED:
1257    case MIGRATION_STATUS_FAILED:
1258        return true;
1259    case MIGRATION_STATUS_SETUP:
1260    case MIGRATION_STATUS_CANCELLING:
1261    case MIGRATION_STATUS_ACTIVE:
1262    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1263    case MIGRATION_STATUS_COLO:
1264    case MIGRATION_STATUS_PRE_SWITCHOVER:
1265    case MIGRATION_STATUS_DEVICE:
1266        return false;
1267    case MIGRATION_STATUS__MAX:
1268        g_assert_not_reached();
1269    }
1270
1271    return false;
1272}
1273
1274void migrate_init(MigrationState *s)
1275{
1276    /*
1277     * Reinitialise all migration state, except
1278     * parameters/capabilities that the user set, and
1279     * locks.
1280     */
1281    s->bytes_xfer = 0;
1282    s->xfer_limit = 0;
1283    s->cleanup_bh = 0;
1284    s->to_dst_file = NULL;
1285    s->state = MIGRATION_STATUS_NONE;
1286    s->rp_state.from_dst_file = NULL;
1287    s->rp_state.error = false;
1288    s->mbps = 0.0;
1289    s->downtime = 0;
1290    s->expected_downtime = 0;
1291    s->setup_time = 0;
1292    s->start_postcopy = false;
1293    s->postcopy_after_devices = false;
1294    s->migration_thread_running = false;
1295    error_free(s->error);
1296    s->error = NULL;
1297
1298    migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1299
1300    s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1301    s->total_time = 0;
1302    s->vm_was_running = false;
1303    s->iteration_initial_bytes = 0;
1304    s->threshold_size = 0;
1305}
1306
1307static GSList *migration_blockers;
1308
1309int migrate_add_blocker(Error *reason, Error **errp)
1310{
1311    if (migrate_get_current()->only_migratable) {
1312        error_propagate(errp, error_copy(reason));
1313        error_prepend(errp, "disallowing migration blocker "
1314                          "(--only_migratable) for: ");
1315        return -EACCES;
1316    }
1317
1318    if (migration_is_idle()) {
1319        migration_blockers = g_slist_prepend(migration_blockers, reason);
1320        return 0;
1321    }
1322
1323    error_propagate(errp, error_copy(reason));
1324    error_prepend(errp, "disallowing migration blocker (migration in "
1325                      "progress) for: ");
1326    return -EBUSY;
1327}
1328
1329void migrate_del_blocker(Error *reason)
1330{
1331    migration_blockers = g_slist_remove(migration_blockers, reason);
1332}
1333
1334void qmp_migrate_incoming(const char *uri, Error **errp)
1335{
1336    Error *local_err = NULL;
1337    static bool once = true;
1338
1339    if (!deferred_incoming) {
1340        error_setg(errp, "For use with '-incoming defer'");
1341        return;
1342    }
1343    if (!once) {
1344        error_setg(errp, "The incoming migration has already been started");
1345    }
1346
1347    qemu_start_incoming_migration(uri, &local_err);
1348
1349    if (local_err) {
1350        error_propagate(errp, local_err);
1351        return;
1352    }
1353
1354    once = false;
1355}
1356
1357bool migration_is_blocked(Error **errp)
1358{
1359    if (qemu_savevm_state_blocked(errp)) {
1360        return true;
1361    }
1362
1363    if (migration_blockers) {
1364        error_propagate(errp, error_copy(migration_blockers->data));
1365        return true;
1366    }
1367
1368    return false;
1369}
1370
1371void qmp_migrate(const char *uri, bool has_blk, bool blk,
1372                 bool has_inc, bool inc, bool has_detach, bool detach,
1373                 Error **errp)
1374{
1375    Error *local_err = NULL;
1376    MigrationState *s = migrate_get_current();
1377    const char *p;
1378
1379    if (migration_is_setup_or_active(s->state) ||
1380        s->state == MIGRATION_STATUS_CANCELLING ||
1381        s->state == MIGRATION_STATUS_COLO) {
1382        error_setg(errp, QERR_MIGRATION_ACTIVE);
1383        return;
1384    }
1385    if (runstate_check(RUN_STATE_INMIGRATE)) {
1386        error_setg(errp, "Guest is waiting for an incoming migration");
1387        return;
1388    }
1389
1390    if (migration_is_blocked(errp)) {
1391        return;
1392    }
1393
1394    if ((has_blk && blk) || (has_inc && inc)) {
1395        if (migrate_use_block() || migrate_use_block_incremental()) {
1396            error_setg(errp, "Command options are incompatible with "
1397                       "current migration capabilities");
1398            return;
1399        }
1400        migrate_set_block_enabled(true, &local_err);
1401        if (local_err) {
1402            error_propagate(errp, local_err);
1403            return;
1404        }
1405        s->must_remove_block_options = true;
1406    }
1407
1408    if (has_inc && inc) {
1409        migrate_set_block_incremental(s, true);
1410    }
1411
1412    migrate_init(s);
1413
1414    if (strstart(uri, "tcp:", &p)) {
1415        tcp_start_outgoing_migration(s, p, &local_err);
1416#ifdef CONFIG_RDMA
1417    } else if (strstart(uri, "rdma:", &p)) {
1418        rdma_start_outgoing_migration(s, p, &local_err);
1419#endif
1420    } else if (strstart(uri, "exec:", &p)) {
1421        exec_start_outgoing_migration(s, p, &local_err);
1422    } else if (strstart(uri, "unix:", &p)) {
1423        unix_start_outgoing_migration(s, p, &local_err);
1424    } else if (strstart(uri, "fd:", &p)) {
1425        fd_start_outgoing_migration(s, p, &local_err);
1426    } else {
1427        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1428                   "a valid migration protocol");
1429        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1430                          MIGRATION_STATUS_FAILED);
1431        block_cleanup_parameters(s);
1432        return;
1433    }
1434
1435    if (local_err) {
1436        migrate_fd_error(s, local_err);
1437        error_propagate(errp, local_err);
1438        return;
1439    }
1440}
1441
1442void qmp_migrate_cancel(Error **errp)
1443{
1444    migrate_fd_cancel(migrate_get_current());
1445}
1446
1447void qmp_migrate_continue(MigrationStatus state, Error **errp)
1448{
1449    MigrationState *s = migrate_get_current();
1450    if (s->state != state) {
1451        error_setg(errp,  "Migration not in expected state: %s",
1452                   MigrationStatus_str(s->state));
1453        return;
1454    }
1455    qemu_sem_post(&s->pause_sem);
1456}
1457
1458void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1459{
1460    MigrateSetParameters p = {
1461        .has_xbzrle_cache_size = true,
1462        .xbzrle_cache_size = value,
1463    };
1464
1465    qmp_migrate_set_parameters(&p, errp);
1466}
1467
1468int64_t qmp_query_migrate_cache_size(Error **errp)
1469{
1470    return migrate_xbzrle_cache_size();
1471}
1472
1473void qmp_migrate_set_speed(int64_t value, Error **errp)
1474{
1475    MigrateSetParameters p = {
1476        .has_max_bandwidth = true,
1477        .max_bandwidth = value,
1478    };
1479
1480    qmp_migrate_set_parameters(&p, errp);
1481}
1482
1483void qmp_migrate_set_downtime(double value, Error **errp)
1484{
1485    if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
1486        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1487                         "the range of 0 to %d seconds",
1488                         MAX_MIGRATE_DOWNTIME_SECONDS);
1489        return;
1490    }
1491
1492    value *= 1000; /* Convert to milliseconds */
1493    value = MAX(0, MIN(INT64_MAX, value));
1494
1495    MigrateSetParameters p = {
1496        .has_downtime_limit = true,
1497        .downtime_limit = value,
1498    };
1499
1500    qmp_migrate_set_parameters(&p, errp);
1501}
1502
1503bool migrate_release_ram(void)
1504{
1505    MigrationState *s;
1506
1507    s = migrate_get_current();
1508
1509    return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
1510}
1511
1512bool migrate_postcopy_ram(void)
1513{
1514    MigrationState *s;
1515
1516    s = migrate_get_current();
1517
1518    return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1519}
1520
1521bool migrate_postcopy(void)
1522{
1523    return migrate_postcopy_ram() || migrate_dirty_bitmaps();
1524}
1525
1526bool migrate_auto_converge(void)
1527{
1528    MigrationState *s;
1529
1530    s = migrate_get_current();
1531
1532    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
1533}
1534
1535bool migrate_zero_blocks(void)
1536{
1537    MigrationState *s;
1538
1539    s = migrate_get_current();
1540
1541    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1542}
1543
1544bool migrate_use_compression(void)
1545{
1546    MigrationState *s;
1547
1548    s = migrate_get_current();
1549
1550    return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1551}
1552
1553int migrate_compress_level(void)
1554{
1555    MigrationState *s;
1556
1557    s = migrate_get_current();
1558
1559    return s->parameters.compress_level;
1560}
1561
1562int migrate_compress_threads(void)
1563{
1564    MigrationState *s;
1565
1566    s = migrate_get_current();
1567
1568    return s->parameters.compress_threads;
1569}
1570
1571int migrate_decompress_threads(void)
1572{
1573    MigrationState *s;
1574
1575    s = migrate_get_current();
1576
1577    return s->parameters.decompress_threads;
1578}
1579
1580bool migrate_dirty_bitmaps(void)
1581{
1582    MigrationState *s;
1583
1584    s = migrate_get_current();
1585
1586    return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
1587}
1588
1589bool migrate_use_events(void)
1590{
1591    MigrationState *s;
1592
1593    s = migrate_get_current();
1594
1595    return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1596}
1597
1598bool migrate_use_multifd(void)
1599{
1600    MigrationState *s;
1601
1602    s = migrate_get_current();
1603
1604    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
1605}
1606
1607bool migrate_pause_before_switchover(void)
1608{
1609    MigrationState *s;
1610
1611    s = migrate_get_current();
1612
1613    return s->enabled_capabilities[
1614        MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
1615}
1616
1617int migrate_multifd_channels(void)
1618{
1619    MigrationState *s;
1620
1621    s = migrate_get_current();
1622
1623    return s->parameters.x_multifd_channels;
1624}
1625
1626int migrate_multifd_page_count(void)
1627{
1628    MigrationState *s;
1629
1630    s = migrate_get_current();
1631
1632    return s->parameters.x_multifd_page_count;
1633}
1634
1635int migrate_use_xbzrle(void)
1636{
1637    MigrationState *s;
1638
1639    s = migrate_get_current();
1640
1641    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1642}
1643
1644int64_t migrate_xbzrle_cache_size(void)
1645{
1646    MigrationState *s;
1647
1648    s = migrate_get_current();
1649
1650    return s->parameters.xbzrle_cache_size;
1651}
1652
1653bool migrate_use_block(void)
1654{
1655    MigrationState *s;
1656
1657    s = migrate_get_current();
1658
1659    return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
1660}
1661
1662bool migrate_use_return_path(void)
1663{
1664    MigrationState *s;
1665
1666    s = migrate_get_current();
1667
1668    return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
1669}
1670
1671bool migrate_use_block_incremental(void)
1672{
1673    MigrationState *s;
1674
1675    s = migrate_get_current();
1676
1677    return s->parameters.block_incremental;
1678}
1679
1680/* migration thread support */
1681/*
1682 * Something bad happened to the RP stream, mark an error
1683 * The caller shall print or trace something to indicate why
1684 */
1685static void mark_source_rp_bad(MigrationState *s)
1686{
1687    s->rp_state.error = true;
1688}
1689
1690static struct rp_cmd_args {
1691    ssize_t     len; /* -1 = variable */
1692    const char *name;
1693} rp_cmd_args[] = {
1694    [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
1695    [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
1696    [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
1697    [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
1698    [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
1699    [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
1700};
1701
1702/*
1703 * Process a request for pages received on the return path,
1704 * We're allowed to send more than requested (e.g. to round to our page size)
1705 * and we don't need to send pages that have already been sent.
1706 */
1707static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
1708                                       ram_addr_t start, size_t len)
1709{
1710    long our_host_ps = getpagesize();
1711
1712    trace_migrate_handle_rp_req_pages(rbname, start, len);
1713
1714    /*
1715     * Since we currently insist on matching page sizes, just sanity check
1716     * we're being asked for whole host pages.
1717     */
1718    if (start & (our_host_ps-1) ||
1719       (len & (our_host_ps-1))) {
1720        error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
1721                     " len: %zd", __func__, start, len);
1722        mark_source_rp_bad(ms);
1723        return;
1724    }
1725
1726    if (ram_save_queue_pages(rbname, start, len)) {
1727        mark_source_rp_bad(ms);
1728    }
1729}
1730
1731/*
1732 * Handles messages sent on the return path towards the source VM
1733 *
1734 */
1735static void *source_return_path_thread(void *opaque)
1736{
1737    MigrationState *ms = opaque;
1738    QEMUFile *rp = ms->rp_state.from_dst_file;
1739    uint16_t header_len, header_type;
1740    uint8_t buf[512];
1741    uint32_t tmp32, sibling_error;
1742    ram_addr_t start = 0; /* =0 to silence warning */
1743    size_t  len = 0, expected_len;
1744    int res;
1745
1746    trace_source_return_path_thread_entry();
1747    while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1748           migration_is_setup_or_active(ms->state)) {
1749        trace_source_return_path_thread_loop_top();
1750        header_type = qemu_get_be16(rp);
1751        header_len = qemu_get_be16(rp);
1752
1753        if (qemu_file_get_error(rp)) {
1754            mark_source_rp_bad(ms);
1755            goto out;
1756        }
1757
1758        if (header_type >= MIG_RP_MSG_MAX ||
1759            header_type == MIG_RP_MSG_INVALID) {
1760            error_report("RP: Received invalid message 0x%04x length 0x%04x",
1761                    header_type, header_len);
1762            mark_source_rp_bad(ms);
1763            goto out;
1764        }
1765
1766        if ((rp_cmd_args[header_type].len != -1 &&
1767            header_len != rp_cmd_args[header_type].len) ||
1768            header_len > sizeof(buf)) {
1769            error_report("RP: Received '%s' message (0x%04x) with"
1770                    "incorrect length %d expecting %zu",
1771                    rp_cmd_args[header_type].name, header_type, header_len,
1772                    (size_t)rp_cmd_args[header_type].len);
1773            mark_source_rp_bad(ms);
1774            goto out;
1775        }
1776
1777        /* We know we've got a valid header by this point */
1778        res = qemu_get_buffer(rp, buf, header_len);
1779        if (res != header_len) {
1780            error_report("RP: Failed reading data for message 0x%04x"
1781                         " read %d expected %d",
1782                         header_type, res, header_len);
1783            mark_source_rp_bad(ms);
1784            goto out;
1785        }
1786
1787        /* OK, we have the message and the data */
1788        switch (header_type) {
1789        case MIG_RP_MSG_SHUT:
1790            sibling_error = ldl_be_p(buf);
1791            trace_source_return_path_thread_shut(sibling_error);
1792            if (sibling_error) {
1793                error_report("RP: Sibling indicated error %d", sibling_error);
1794                mark_source_rp_bad(ms);
1795            }
1796            /*
1797             * We'll let the main thread deal with closing the RP
1798             * we could do a shutdown(2) on it, but we're the only user
1799             * anyway, so there's nothing gained.
1800             */
1801            goto out;
1802
1803        case MIG_RP_MSG_PONG:
1804            tmp32 = ldl_be_p(buf);
1805            trace_source_return_path_thread_pong(tmp32);
1806            break;
1807
1808        case MIG_RP_MSG_REQ_PAGES:
1809            start = ldq_be_p(buf);
1810            len = ldl_be_p(buf + 8);
1811            migrate_handle_rp_req_pages(ms, NULL, start, len);
1812            break;
1813
1814        case MIG_RP_MSG_REQ_PAGES_ID:
1815            expected_len = 12 + 1; /* header + termination */
1816
1817            if (header_len >= expected_len) {
1818                start = ldq_be_p(buf);
1819                len = ldl_be_p(buf + 8);
1820                /* Now we expect an idstr */
1821                tmp32 = buf[12]; /* Length of the following idstr */
1822                buf[13 + tmp32] = '\0';
1823                expected_len += tmp32;
1824            }
1825            if (header_len != expected_len) {
1826                error_report("RP: Req_Page_id with length %d expecting %zd",
1827                        header_len, expected_len);
1828                mark_source_rp_bad(ms);
1829                goto out;
1830            }
1831            migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
1832            break;
1833
1834        default:
1835            break;
1836        }
1837    }
1838    if (qemu_file_get_error(rp)) {
1839        trace_source_return_path_thread_bad_end();
1840        mark_source_rp_bad(ms);
1841    }
1842
1843    trace_source_return_path_thread_end();
1844out:
1845    ms->rp_state.from_dst_file = NULL;
1846    qemu_fclose(rp);
1847    return NULL;
1848}
1849
1850static int open_return_path_on_source(MigrationState *ms)
1851{
1852
1853    ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
1854    if (!ms->rp_state.from_dst_file) {
1855        return -1;
1856    }
1857
1858    trace_open_return_path_on_source();
1859    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1860                       source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1861
1862    trace_open_return_path_on_source_continue();
1863
1864    return 0;
1865}
1866
1867/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1868static int await_return_path_close_on_source(MigrationState *ms)
1869{
1870    /*
1871     * If this is a normal exit then the destination will send a SHUT and the
1872     * rp_thread will exit, however if there's an error we need to cause
1873     * it to exit.
1874     */
1875    if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
1876        /*
1877         * shutdown(2), if we have it, will cause it to unblock if it's stuck
1878         * waiting for the destination.
1879         */
1880        qemu_file_shutdown(ms->rp_state.from_dst_file);
1881        mark_source_rp_bad(ms);
1882    }
1883    trace_await_return_path_close_on_source_joining();
1884    qemu_thread_join(&ms->rp_state.rp_thread);
1885    trace_await_return_path_close_on_source_close();
1886    return ms->rp_state.error;
1887}
1888
1889/*
1890 * Switch from normal iteration to postcopy
1891 * Returns non-0 on error
1892 */
1893static int postcopy_start(MigrationState *ms)
1894{
1895    int ret;
1896    QIOChannelBuffer *bioc;
1897    QEMUFile *fb;
1898    int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1899    bool restart_block = false;
1900    int cur_state = MIGRATION_STATUS_ACTIVE;
1901    if (!migrate_pause_before_switchover()) {
1902        migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
1903                          MIGRATION_STATUS_POSTCOPY_ACTIVE);
1904    }
1905
1906    trace_postcopy_start();
1907    qemu_mutex_lock_iothread();
1908    trace_postcopy_start_set_run();
1909
1910    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1911    global_state_store();
1912    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1913    if (ret < 0) {
1914        goto fail;
1915    }
1916
1917    ret = migration_maybe_pause(ms, &cur_state,
1918                                MIGRATION_STATUS_POSTCOPY_ACTIVE);
1919    if (ret < 0) {
1920        goto fail;
1921    }
1922
1923    ret = bdrv_inactivate_all();
1924    if (ret < 0) {
1925        goto fail;
1926    }
1927    restart_block = true;
1928
1929    /*
1930     * Cause any non-postcopiable, but iterative devices to
1931     * send out their final data.
1932     */
1933    qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
1934
1935    /*
1936     * in Finish migrate and with the io-lock held everything should
1937     * be quiet, but we've potentially still got dirty pages and we
1938     * need to tell the destination to throw any pages it's already received
1939     * that are dirty
1940     */
1941    if (migrate_postcopy_ram()) {
1942        if (ram_postcopy_send_discard_bitmap(ms)) {
1943            error_report("postcopy send discard bitmap failed");
1944            goto fail;
1945        }
1946    }
1947
1948    /*
1949     * send rest of state - note things that are doing postcopy
1950     * will notice we're in POSTCOPY_ACTIVE and not actually
1951     * wrap their state up here
1952     */
1953    qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
1954    if (migrate_postcopy_ram()) {
1955        /* Ping just for debugging, helps line traces up */
1956        qemu_savevm_send_ping(ms->to_dst_file, 2);
1957    }
1958
1959    /*
1960     * While loading the device state we may trigger page transfer
1961     * requests and the fd must be free to process those, and thus
1962     * the destination must read the whole device state off the fd before
1963     * it starts processing it.  Unfortunately the ad-hoc migration format
1964     * doesn't allow the destination to know the size to read without fully
1965     * parsing it through each devices load-state code (especially the open
1966     * coded devices that use get/put).
1967     * So we wrap the device state up in a package with a length at the start;
1968     * to do this we use a qemu_buf to hold the whole of the device state.
1969     */
1970    bioc = qio_channel_buffer_new(4096);
1971    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
1972    fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
1973    object_unref(OBJECT(bioc));
1974
1975    /*
1976     * Make sure the receiver can get incoming pages before we send the rest
1977     * of the state
1978     */
1979    qemu_savevm_send_postcopy_listen(fb);
1980
1981    qemu_savevm_state_complete_precopy(fb, false, false);
1982    if (migrate_postcopy_ram()) {
1983        qemu_savevm_send_ping(fb, 3);
1984    }
1985
1986    qemu_savevm_send_postcopy_run(fb);
1987
1988    /* <><> end of stuff going into the package */
1989
1990    /* Last point of recovery; as soon as we send the package the destination
1991     * can open devices and potentially start running.
1992     * Lets just check again we've not got any errors.
1993     */
1994    ret = qemu_file_get_error(ms->to_dst_file);
1995    if (ret) {
1996        error_report("postcopy_start: Migration stream errored (pre package)");
1997        goto fail_closefb;
1998    }
1999
2000    restart_block = false;
2001
2002    /* Now send that blob */
2003    if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2004        goto fail_closefb;
2005    }
2006    qemu_fclose(fb);
2007
2008    /* Send a notify to give a chance for anything that needs to happen
2009     * at the transition to postcopy and after the device state; in particular
2010     * spice needs to trigger a transition now
2011     */
2012    ms->postcopy_after_devices = true;
2013    notifier_list_notify(&migration_state_notifiers, ms);
2014
2015    ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
2016
2017    qemu_mutex_unlock_iothread();
2018
2019    if (migrate_postcopy_ram()) {
2020        /*
2021         * Although this ping is just for debug, it could potentially be
2022         * used for getting a better measurement of downtime at the source.
2023         */
2024        qemu_savevm_send_ping(ms->to_dst_file, 4);
2025    }
2026
2027    if (migrate_release_ram()) {
2028        ram_postcopy_migrated_memory_release(ms);
2029    }
2030
2031    ret = qemu_file_get_error(ms->to_dst_file);
2032    if (ret) {
2033        error_report("postcopy_start: Migration stream errored");
2034        migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2035                              MIGRATION_STATUS_FAILED);
2036    }
2037
2038    return ret;
2039
2040fail_closefb:
2041    qemu_fclose(fb);
2042fail:
2043    migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2044                          MIGRATION_STATUS_FAILED);
2045    if (restart_block) {
2046        /* A failure happened early enough that we know the destination hasn't
2047         * accessed block devices, so we're safe to recover.
2048         */
2049        Error *local_err = NULL;
2050
2051        bdrv_invalidate_cache_all(&local_err);
2052        if (local_err) {
2053            error_report_err(local_err);
2054        }
2055    }
2056    qemu_mutex_unlock_iothread();
2057    return -1;
2058}
2059
2060/**
2061 * migration_maybe_pause: Pause if required to by
2062 * migrate_pause_before_switchover called with the iothread locked
2063 * Returns: 0 on success
2064 */
2065static int migration_maybe_pause(MigrationState *s,
2066                                 int *current_active_state,
2067                                 int new_state)
2068{
2069    if (!migrate_pause_before_switchover()) {
2070        return 0;
2071    }
2072
2073    /* Since leaving this state is not atomic with posting the semaphore
2074     * it's possible that someone could have issued multiple migrate_continue
2075     * and the semaphore is incorrectly positive at this point;
2076     * the docs say it's undefined to reinit a semaphore that's already
2077     * init'd, so use timedwait to eat up any existing posts.
2078     */
2079    while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
2080        /* This block intentionally left blank */
2081    }
2082
2083    qemu_mutex_unlock_iothread();
2084    migrate_set_state(&s->state, *current_active_state,
2085                      MIGRATION_STATUS_PRE_SWITCHOVER);
2086    qemu_sem_wait(&s->pause_sem);
2087    migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2088                      new_state);
2089    *current_active_state = new_state;
2090    qemu_mutex_lock_iothread();
2091
2092    return s->state == new_state ? 0 : -EINVAL;
2093}
2094
2095/**
2096 * migration_completion: Used by migration_thread when there's not much left.
2097 *   The caller 'breaks' the loop when this returns.
2098 *
2099 * @s: Current migration state
2100 */
2101static void migration_completion(MigrationState *s)
2102{
2103    int ret;
2104    int current_active_state = s->state;
2105
2106    if (s->state == MIGRATION_STATUS_ACTIVE) {
2107        qemu_mutex_lock_iothread();
2108        s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2109        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
2110        s->vm_was_running = runstate_is_running();
2111        ret = global_state_store();
2112
2113        if (!ret) {
2114            bool inactivate = !migrate_colo_enabled();
2115            ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2116            if (ret >= 0) {
2117                ret = migration_maybe_pause(s, &current_active_state,
2118                                            MIGRATION_STATUS_DEVICE);
2119            }
2120            if (ret >= 0) {
2121                qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
2122                ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
2123                                                         inactivate);
2124            }
2125            if (inactivate && ret >= 0) {
2126                s->block_inactive = true;
2127            }
2128        }
2129        qemu_mutex_unlock_iothread();
2130
2131        if (ret < 0) {
2132            goto fail;
2133        }
2134    } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2135        trace_migration_completion_postcopy_end();
2136
2137        qemu_savevm_state_complete_postcopy(s->to_dst_file);
2138        trace_migration_completion_postcopy_end_after_complete();
2139    }
2140
2141    /*
2142     * If rp was opened we must clean up the thread before
2143     * cleaning everything else up (since if there are no failures
2144     * it will wait for the destination to send it's status in
2145     * a SHUT command).
2146     */
2147    if (s->rp_state.from_dst_file) {
2148        int rp_error;
2149        trace_migration_return_path_end_before();
2150        rp_error = await_return_path_close_on_source(s);
2151        trace_migration_return_path_end_after(rp_error);
2152        if (rp_error) {
2153            goto fail_invalidate;
2154        }
2155    }
2156
2157    if (qemu_file_get_error(s->to_dst_file)) {
2158        trace_migration_completion_file_err();
2159        goto fail_invalidate;
2160    }
2161
2162    if (!migrate_colo_enabled()) {
2163        migrate_set_state(&s->state, current_active_state,
2164                          MIGRATION_STATUS_COMPLETED);
2165    }
2166
2167    return;
2168
2169fail_invalidate:
2170    /* If not doing postcopy, vm_start() will be called: let's regain
2171     * control on images.
2172     */
2173    if (s->state == MIGRATION_STATUS_ACTIVE ||
2174        s->state == MIGRATION_STATUS_DEVICE) {
2175        Error *local_err = NULL;
2176
2177        qemu_mutex_lock_iothread();
2178        bdrv_invalidate_cache_all(&local_err);
2179        if (local_err) {
2180            error_report_err(local_err);
2181        } else {
2182            s->block_inactive = false;
2183        }
2184        qemu_mutex_unlock_iothread();
2185    }
2186
2187fail:
2188    migrate_set_state(&s->state, current_active_state,
2189                      MIGRATION_STATUS_FAILED);
2190}
2191
2192bool migrate_colo_enabled(void)
2193{
2194    MigrationState *s = migrate_get_current();
2195    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
2196}
2197
2198static void migration_calculate_complete(MigrationState *s)
2199{
2200    uint64_t bytes = qemu_ftell(s->to_dst_file);
2201    int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2202
2203    s->total_time = end_time - s->start_time;
2204    if (!s->downtime) {
2205        /*
2206         * It's still not set, so we are precopy migration.  For
2207         * postcopy, downtime is calculated during postcopy_start().
2208         */
2209        s->downtime = end_time - s->downtime_start;
2210    }
2211
2212    if (s->total_time) {
2213        s->mbps = ((double) bytes * 8.0) / s->total_time / 1000;
2214    }
2215}
2216
2217static void migration_update_counters(MigrationState *s,
2218                                      int64_t current_time)
2219{
2220    uint64_t transferred, time_spent;
2221    double bandwidth;
2222
2223    if (current_time < s->iteration_start_time + BUFFER_DELAY) {
2224        return;
2225    }
2226
2227    transferred = qemu_ftell(s->to_dst_file) - s->iteration_initial_bytes;
2228    time_spent = current_time - s->iteration_start_time;
2229    bandwidth = (double)transferred / time_spent;
2230    s->threshold_size = bandwidth * s->parameters.downtime_limit;
2231
2232    s->mbps = (((double) transferred * 8.0) /
2233               ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
2234
2235    /*
2236     * if we haven't sent anything, we don't want to
2237     * recalculate. 10000 is a small enough number for our purposes
2238     */
2239    if (ram_counters.dirty_pages_rate && transferred > 10000) {
2240        s->expected_downtime = ram_counters.dirty_pages_rate *
2241            qemu_target_page_size() / bandwidth;
2242    }
2243
2244    qemu_file_reset_rate_limit(s->to_dst_file);
2245
2246    s->iteration_start_time = current_time;
2247    s->iteration_initial_bytes = qemu_ftell(s->to_dst_file);
2248
2249    trace_migrate_transferred(transferred, time_spent,
2250                              bandwidth, s->threshold_size);
2251}
2252
2253/* Migration thread iteration status */
2254typedef enum {
2255    MIG_ITERATE_RESUME,         /* Resume current iteration */
2256    MIG_ITERATE_SKIP,           /* Skip current iteration */
2257    MIG_ITERATE_BREAK,          /* Break the loop */
2258} MigIterateState;
2259
2260/*
2261 * Return true if continue to the next iteration directly, false
2262 * otherwise.
2263 */
2264static MigIterateState migration_iteration_run(MigrationState *s)
2265{
2266    uint64_t pending_size, pend_pre, pend_compat, pend_post;
2267    bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
2268
2269    qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
2270                              &pend_compat, &pend_post);
2271    pending_size = pend_pre + pend_compat + pend_post;
2272
2273    trace_migrate_pending(pending_size, s->threshold_size,
2274                          pend_pre, pend_compat, pend_post);
2275
2276    if (pending_size && pending_size >= s->threshold_size) {
2277        /* Still a significant amount to transfer */
2278        if (migrate_postcopy() && !in_postcopy &&
2279            pend_pre <= s->threshold_size &&
2280            atomic_read(&s->start_postcopy)) {
2281            if (postcopy_start(s)) {
2282                error_report("%s: postcopy failed to start", __func__);
2283            }
2284            return MIG_ITERATE_SKIP;
2285        }
2286        /* Just another iteration step */
2287        qemu_savevm_state_iterate(s->to_dst_file,
2288            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2289    } else {
2290        trace_migration_thread_low_pending(pending_size);
2291        migration_completion(s);
2292        return MIG_ITERATE_BREAK;
2293    }
2294
2295    return MIG_ITERATE_RESUME;
2296}
2297
2298static void migration_iteration_finish(MigrationState *s)
2299{
2300    /* If we enabled cpu throttling for auto-converge, turn it off. */
2301    cpu_throttle_stop();
2302
2303    qemu_mutex_lock_iothread();
2304    switch (s->state) {
2305    case MIGRATION_STATUS_COMPLETED:
2306        migration_calculate_complete(s);
2307        runstate_set(RUN_STATE_POSTMIGRATE);
2308        break;
2309
2310    case MIGRATION_STATUS_ACTIVE:
2311        /*
2312         * We should really assert here, but since it's during
2313         * migration, let's try to reduce the usage of assertions.
2314         */
2315        if (!migrate_colo_enabled()) {
2316            error_report("%s: critical error: calling COLO code without "
2317                         "COLO enabled", __func__);
2318        }
2319        migrate_start_colo_process(s);
2320        /*
2321         * Fixme: we will run VM in COLO no matter its old running state.
2322         * After exited COLO, we will keep running.
2323         */
2324        s->vm_was_running = true;
2325        /* Fallthrough */
2326    case MIGRATION_STATUS_FAILED:
2327    case MIGRATION_STATUS_CANCELLED:
2328        if (s->vm_was_running) {
2329            vm_start();
2330        } else {
2331            if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
2332                runstate_set(RUN_STATE_POSTMIGRATE);
2333            }
2334        }
2335        break;
2336
2337    default:
2338        /* Should not reach here, but if so, forgive the VM. */
2339        error_report("%s: Unknown ending state %d", __func__, s->state);
2340        break;
2341    }
2342    qemu_bh_schedule(s->cleanup_bh);
2343    qemu_mutex_unlock_iothread();
2344}
2345
2346/*
2347 * Master migration thread on the source VM.
2348 * It drives the migration and pumps the data down the outgoing channel.
2349 */
2350static void *migration_thread(void *opaque)
2351{
2352    MigrationState *s = opaque;
2353    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
2354
2355    rcu_register_thread();
2356
2357    s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2358
2359    qemu_savevm_state_header(s->to_dst_file);
2360
2361    /*
2362     * If we opened the return path, we need to make sure dst has it
2363     * opened as well.
2364     */
2365    if (s->rp_state.from_dst_file) {
2366        /* Now tell the dest that it should open its end so it can reply */
2367        qemu_savevm_send_open_return_path(s->to_dst_file);
2368
2369        /* And do a ping that will make stuff easier to debug */
2370        qemu_savevm_send_ping(s->to_dst_file, 1);
2371    }
2372
2373    if (migrate_postcopy()) {
2374        /*
2375         * Tell the destination that we *might* want to do postcopy later;
2376         * if the other end can't do postcopy it should fail now, nice and
2377         * early.
2378         */
2379        qemu_savevm_send_postcopy_advise(s->to_dst_file);
2380    }
2381
2382    qemu_savevm_state_setup(s->to_dst_file);
2383
2384    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
2385    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2386                      MIGRATION_STATUS_ACTIVE);
2387
2388    trace_migration_thread_setup_complete();
2389
2390    while (s->state == MIGRATION_STATUS_ACTIVE ||
2391           s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2392        int64_t current_time;
2393
2394        if (!qemu_file_rate_limit(s->to_dst_file)) {
2395            MigIterateState iter_state = migration_iteration_run(s);
2396            if (iter_state == MIG_ITERATE_SKIP) {
2397                continue;
2398            } else if (iter_state == MIG_ITERATE_BREAK) {
2399                break;
2400            }
2401        }
2402
2403        if (qemu_file_get_error(s->to_dst_file)) {
2404            if (migration_is_setup_or_active(s->state)) {
2405                migrate_set_state(&s->state, s->state,
2406                                  MIGRATION_STATUS_FAILED);
2407            }
2408            trace_migration_thread_file_err();
2409            break;
2410        }
2411
2412        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2413
2414        migration_update_counters(s, current_time);
2415
2416        if (qemu_file_rate_limit(s->to_dst_file)) {
2417            /* usleep expects microseconds */
2418            g_usleep((s->iteration_start_time + BUFFER_DELAY -
2419                      current_time) * 1000);
2420        }
2421    }
2422
2423    trace_migration_thread_after_loop();
2424    migration_iteration_finish(s);
2425    rcu_unregister_thread();
2426    return NULL;
2427}
2428
2429void migrate_fd_connect(MigrationState *s, Error *error_in)
2430{
2431    s->expected_downtime = s->parameters.downtime_limit;
2432    s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
2433    if (error_in) {
2434        migrate_fd_error(s, error_in);
2435        migrate_fd_cleanup(s);
2436        return;
2437    }
2438
2439    qemu_file_set_blocking(s->to_dst_file, true);
2440    qemu_file_set_rate_limit(s->to_dst_file,
2441                             s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
2442
2443    /* Notify before starting migration thread */
2444    notifier_list_notify(&migration_state_notifiers, s);
2445
2446    /*
2447     * Open the return path. For postcopy, it is used exclusively. For
2448     * precopy, only if user specified "return-path" capability would
2449     * QEMU uses the return path.
2450     */
2451    if (migrate_postcopy_ram() || migrate_use_return_path()) {
2452        if (open_return_path_on_source(s)) {
2453            error_report("Unable to open return-path for postcopy");
2454            migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2455                              MIGRATION_STATUS_FAILED);
2456            migrate_fd_cleanup(s);
2457            return;
2458        }
2459    }
2460
2461    if (multifd_save_setup() != 0) {
2462        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2463                          MIGRATION_STATUS_FAILED);
2464        migrate_fd_cleanup(s);
2465        return;
2466    }
2467    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
2468                       QEMU_THREAD_JOINABLE);
2469    s->migration_thread_running = true;
2470}
2471
2472void migration_global_dump(Monitor *mon)
2473{
2474    MigrationState *ms = migrate_get_current();
2475
2476    monitor_printf(mon, "globals:\n");
2477    monitor_printf(mon, "store-global-state: %s\n",
2478                   ms->store_global_state ? "on" : "off");
2479    monitor_printf(mon, "only-migratable: %s\n",
2480                   ms->only_migratable ? "on" : "off");
2481    monitor_printf(mon, "send-configuration: %s\n",
2482                   ms->send_configuration ? "on" : "off");
2483    monitor_printf(mon, "send-section-footer: %s\n",
2484                   ms->send_section_footer ? "on" : "off");
2485}
2486
2487#define DEFINE_PROP_MIG_CAP(name, x)             \
2488    DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
2489
2490static Property migration_properties[] = {
2491    DEFINE_PROP_BOOL("store-global-state", MigrationState,
2492                     store_global_state, true),
2493    DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false),
2494    DEFINE_PROP_BOOL("send-configuration", MigrationState,
2495                     send_configuration, true),
2496    DEFINE_PROP_BOOL("send-section-footer", MigrationState,
2497                     send_section_footer, true),
2498
2499    /* Migration parameters */
2500    DEFINE_PROP_UINT8("x-compress-level", MigrationState,
2501                      parameters.compress_level,
2502                      DEFAULT_MIGRATE_COMPRESS_LEVEL),
2503    DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
2504                      parameters.compress_threads,
2505                      DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
2506    DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
2507                      parameters.decompress_threads,
2508                      DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
2509    DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
2510                      parameters.cpu_throttle_initial,
2511                      DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
2512    DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
2513                      parameters.cpu_throttle_increment,
2514                      DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
2515    DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
2516                      parameters.max_bandwidth, MAX_THROTTLE),
2517    DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
2518                      parameters.downtime_limit,
2519                      DEFAULT_MIGRATE_SET_DOWNTIME),
2520    DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
2521                      parameters.x_checkpoint_delay,
2522                      DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
2523    DEFINE_PROP_UINT8("x-multifd-channels", MigrationState,
2524                      parameters.x_multifd_channels,
2525                      DEFAULT_MIGRATE_MULTIFD_CHANNELS),
2526    DEFINE_PROP_UINT32("x-multifd-page-count", MigrationState,
2527                      parameters.x_multifd_page_count,
2528                      DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT),
2529    DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
2530                      parameters.xbzrle_cache_size,
2531                      DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
2532
2533    /* Migration capabilities */
2534    DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
2535    DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
2536    DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
2537    DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
2538    DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
2539    DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
2540    DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
2541    DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
2542    DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
2543    DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
2544    DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
2545    DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD),
2546
2547    DEFINE_PROP_END_OF_LIST(),
2548};
2549
2550static void migration_class_init(ObjectClass *klass, void *data)
2551{
2552    DeviceClass *dc = DEVICE_CLASS(klass);
2553
2554    dc->user_creatable = false;
2555    dc->props = migration_properties;
2556}
2557
2558static void migration_instance_finalize(Object *obj)
2559{
2560    MigrationState *ms = MIGRATION_OBJ(obj);
2561    MigrationParameters *params = &ms->parameters;
2562
2563    qemu_mutex_destroy(&ms->error_mutex);
2564    g_free(params->tls_hostname);
2565    g_free(params->tls_creds);
2566    qemu_sem_destroy(&ms->pause_sem);
2567    error_free(ms->error);
2568}
2569
2570static void migration_instance_init(Object *obj)
2571{
2572    MigrationState *ms = MIGRATION_OBJ(obj);
2573    MigrationParameters *params = &ms->parameters;
2574
2575    ms->state = MIGRATION_STATUS_NONE;
2576    ms->mbps = -1;
2577    qemu_sem_init(&ms->pause_sem, 0);
2578    qemu_mutex_init(&ms->error_mutex);
2579
2580    params->tls_hostname = g_strdup("");
2581    params->tls_creds = g_strdup("");
2582
2583    /* Set has_* up only for parameter checks */
2584    params->has_compress_level = true;
2585    params->has_compress_threads = true;
2586    params->has_decompress_threads = true;
2587    params->has_cpu_throttle_initial = true;
2588    params->has_cpu_throttle_increment = true;
2589    params->has_max_bandwidth = true;
2590    params->has_downtime_limit = true;
2591    params->has_x_checkpoint_delay = true;
2592    params->has_block_incremental = true;
2593    params->has_x_multifd_channels = true;
2594    params->has_x_multifd_page_count = true;
2595    params->has_xbzrle_cache_size = true;
2596}
2597
2598/*
2599 * Return true if check pass, false otherwise. Error will be put
2600 * inside errp if provided.
2601 */
2602static bool migration_object_check(MigrationState *ms, Error **errp)
2603{
2604    MigrationCapabilityStatusList *head = NULL;
2605    /* Assuming all off */
2606    bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
2607    int i;
2608
2609    if (!migrate_params_check(&ms->parameters, errp)) {
2610        return false;
2611    }
2612
2613    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
2614        if (ms->enabled_capabilities[i]) {
2615            head = migrate_cap_add(head, i, true);
2616        }
2617    }
2618
2619    ret = migrate_caps_check(cap_list, head, errp);
2620
2621    /* It works with head == NULL */
2622    qapi_free_MigrationCapabilityStatusList(head);
2623
2624    return ret;
2625}
2626
2627static const TypeInfo migration_type = {
2628    .name = TYPE_MIGRATION,
2629    /*
2630     * NOTE: TYPE_MIGRATION is not really a device, as the object is
2631     * not created using qdev_create(), it is not attached to the qdev
2632     * device tree, and it is never realized.
2633     *
2634     * TODO: Make this TYPE_OBJECT once QOM provides something like
2635     * TYPE_DEVICE's "-global" properties.
2636     */
2637    .parent = TYPE_DEVICE,
2638    .class_init = migration_class_init,
2639    .class_size = sizeof(MigrationClass),
2640    .instance_size = sizeof(MigrationState),
2641    .instance_init = migration_instance_init,
2642    .instance_finalize = migration_instance_finalize,
2643};
2644
2645static void register_migration_types(void)
2646{
2647    type_register_static(&migration_type);
2648}
2649
2650type_init(register_migration_types);
2651