qemu/migration/migration.c
<<
>>
Prefs
   1/*
   2 * QEMU live migration
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu-common.h"
  17#include "qemu/error-report.h"
  18#include "qemu/main-loop.h"
  19#include "migration/migration.h"
  20#include "migration/qemu-file.h"
  21#include "sysemu/sysemu.h"
  22#include "block/block.h"
  23#include "qapi/qmp/qerror.h"
  24#include "qemu/sockets.h"
  25#include "qemu/rcu.h"
  26#include "migration/block.h"
  27#include "qemu/thread.h"
  28#include "qmp-commands.h"
  29#include "trace.h"
  30#include "qapi/util.h"
  31#include "qapi-event.h"
  32
  33#define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
  34
  35/* Amount of time to allocate to each "chunk" of bandwidth-throttled
  36 * data. */
  37#define BUFFER_DELAY     100
  38#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  39
  40/* Default compression thread count */
  41#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  42/* Default decompression thread count, usually decompression is at
  43 * least 4 times as fast as compression.*/
  44#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  45/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  46#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  47
  48/* Migration XBZRLE default cache size */
  49#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
  50
  51static NotifierList migration_state_notifiers =
  52    NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  53
  54static bool deferred_incoming;
  55
  56/* When we add fault tolerance, we could have several
  57   migrations at once.  For now we don't need to add
  58   dynamic creation of migration */
  59
  60/* For outgoing */
  61MigrationState *migrate_get_current(void)
  62{
  63    static MigrationState current_migration = {
  64        .state = MIGRATION_STATUS_NONE,
  65        .bandwidth_limit = MAX_THROTTLE,
  66        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
  67        .mbps = -1,
  68        .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
  69                DEFAULT_MIGRATE_COMPRESS_LEVEL,
  70        .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
  71                DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
  72        .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
  73                DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
  74    };
  75
  76    return &current_migration;
  77}
  78
  79/* For incoming */
  80static MigrationIncomingState *mis_current;
  81
  82MigrationIncomingState *migration_incoming_get_current(void)
  83{
  84    return mis_current;
  85}
  86
  87MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
  88{
  89    mis_current = g_malloc0(sizeof(MigrationIncomingState));
  90    mis_current->file = f;
  91    QLIST_INIT(&mis_current->loadvm_handlers);
  92
  93    return mis_current;
  94}
  95
  96void migration_incoming_state_destroy(void)
  97{
  98    loadvm_free_handlers(mis_current);
  99    g_free(mis_current);
 100    mis_current = NULL;
 101}
 102
 103
 104typedef struct {
 105    bool optional;
 106    uint32_t size;
 107    uint8_t runstate[100];
 108    RunState state;
 109    bool received;
 110} GlobalState;
 111
 112static GlobalState global_state;
 113
 114int global_state_store(void)
 115{
 116    if (!runstate_store((char *)global_state.runstate,
 117                        sizeof(global_state.runstate))) {
 118        error_report("runstate name too big: %s", global_state.runstate);
 119        trace_migrate_state_too_big();
 120        return -EINVAL;
 121    }
 122    return 0;
 123}
 124
 125void global_state_store_running(void)
 126{
 127    const char *state = RunState_lookup[RUN_STATE_RUNNING];
 128    strncpy((char *)global_state.runstate,
 129           state, sizeof(global_state.runstate));
 130}
 131
 132static bool global_state_received(void)
 133{
 134    return global_state.received;
 135}
 136
 137static RunState global_state_get_runstate(void)
 138{
 139    return global_state.state;
 140}
 141
 142void global_state_set_optional(void)
 143{
 144    global_state.optional = true;
 145}
 146
 147static bool global_state_needed(void *opaque)
 148{
 149    GlobalState *s = opaque;
 150    char *runstate = (char *)s->runstate;
 151
 152    /* If it is not optional, it is mandatory */
 153
 154    if (s->optional == false) {
 155        return true;
 156    }
 157
 158    /* If state is running or paused, it is not needed */
 159
 160    if (strcmp(runstate, "running") == 0 ||
 161        strcmp(runstate, "paused") == 0) {
 162        return false;
 163    }
 164
 165    /* for any other state it is needed */
 166    return true;
 167}
 168
 169static int global_state_post_load(void *opaque, int version_id)
 170{
 171    GlobalState *s = opaque;
 172    Error *local_err = NULL;
 173    int r;
 174    char *runstate = (char *)s->runstate;
 175
 176    s->received = true;
 177    trace_migrate_global_state_post_load(runstate);
 178
 179    r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
 180                                -1, &local_err);
 181
 182    if (r == -1) {
 183        if (local_err) {
 184            error_report_err(local_err);
 185        }
 186        return -EINVAL;
 187    }
 188    s->state = r;
 189
 190    return 0;
 191}
 192
 193static void global_state_pre_save(void *opaque)
 194{
 195    GlobalState *s = opaque;
 196
 197    trace_migrate_global_state_pre_save((char *)s->runstate);
 198    s->size = strlen((char *)s->runstate) + 1;
 199}
 200
 201static const VMStateDescription vmstate_globalstate = {
 202    .name = "globalstate",
 203    .version_id = 1,
 204    .minimum_version_id = 1,
 205    .post_load = global_state_post_load,
 206    .pre_save = global_state_pre_save,
 207    .needed = global_state_needed,
 208    .fields = (VMStateField[]) {
 209        VMSTATE_UINT32(size, GlobalState),
 210        VMSTATE_BUFFER(runstate, GlobalState),
 211        VMSTATE_END_OF_LIST()
 212    },
 213};
 214
 215void register_global_state(void)
 216{
 217    /* We would use it independently that we receive it */
 218    strcpy((char *)&global_state.runstate, "");
 219    global_state.received = false;
 220    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
 221}
 222
 223static void migrate_generate_event(int new_state)
 224{
 225    if (migrate_use_events()) {
 226        qapi_event_send_migration(new_state, &error_abort);
 227    }
 228}
 229
 230/*
 231 * Called on -incoming with a defer: uri.
 232 * The migration can be started later after any parameters have been
 233 * changed.
 234 */
 235static void deferred_incoming_migration(Error **errp)
 236{
 237    if (deferred_incoming) {
 238        error_setg(errp, "Incoming migration already deferred");
 239    }
 240    deferred_incoming = true;
 241}
 242
 243void qemu_start_incoming_migration(const char *uri, Error **errp)
 244{
 245    const char *p;
 246
 247    qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
 248    if (!strcmp(uri, "defer")) {
 249        deferred_incoming_migration(errp);
 250    } else if (strstart(uri, "tcp:", &p)) {
 251        tcp_start_incoming_migration(p, errp);
 252#ifdef CONFIG_RDMA
 253    } else if (strstart(uri, "rdma:", &p)) {
 254        rdma_start_incoming_migration(p, errp);
 255#endif
 256#if !defined(WIN32)
 257    } else if (strstart(uri, "exec:", &p)) {
 258        exec_start_incoming_migration(p, errp);
 259    } else if (strstart(uri, "unix:", &p)) {
 260        unix_start_incoming_migration(p, errp);
 261    } else if (strstart(uri, "fd:", &p)) {
 262        fd_start_incoming_migration(p, errp);
 263#endif
 264    } else {
 265        error_setg(errp, "unknown migration protocol: %s", uri);
 266    }
 267}
 268
 269static void process_incoming_migration_co(void *opaque)
 270{
 271    QEMUFile *f = opaque;
 272    Error *local_err = NULL;
 273    int ret;
 274
 275    migration_incoming_state_new(f);
 276    migrate_generate_event(MIGRATION_STATUS_ACTIVE);
 277    ret = qemu_loadvm_state(f);
 278
 279    qemu_fclose(f);
 280    free_xbzrle_decoded_buf();
 281    migration_incoming_state_destroy();
 282
 283    if (ret < 0) {
 284        migrate_generate_event(MIGRATION_STATUS_FAILED);
 285        error_report("load of migration failed: %s", strerror(-ret));
 286        migrate_decompress_threads_join();
 287        exit(EXIT_FAILURE);
 288    }
 289    qemu_announce_self();
 290
 291    /* Make sure all file formats flush their mutable metadata */
 292    bdrv_invalidate_cache_all(&local_err);
 293    if (local_err) {
 294        migrate_generate_event(MIGRATION_STATUS_FAILED);
 295        error_report_err(local_err);
 296        migrate_decompress_threads_join();
 297        exit(EXIT_FAILURE);
 298    }
 299
 300    /* If global state section was not received or we are in running
 301       state, we need to obey autostart. Any other state is set with
 302       runstate_set. */
 303
 304    if (!global_state_received() ||
 305        global_state_get_runstate() == RUN_STATE_RUNNING) {
 306        if (autostart) {
 307            vm_start();
 308        } else {
 309            runstate_set(RUN_STATE_PAUSED);
 310        }
 311    } else {
 312        runstate_set(global_state_get_runstate());
 313    }
 314    migrate_decompress_threads_join();
 315    /*
 316     * This must happen after any state changes since as soon as an external
 317     * observer sees this event they might start to prod at the VM assuming
 318     * it's ready to use.
 319     */
 320    migrate_generate_event(MIGRATION_STATUS_COMPLETED);
 321}
 322
 323void process_incoming_migration(QEMUFile *f)
 324{
 325    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
 326    int fd = qemu_get_fd(f);
 327
 328    assert(fd != -1);
 329    migrate_decompress_threads_create();
 330    qemu_set_nonblock(fd);
 331    qemu_coroutine_enter(co, f);
 332}
 333
 334/* amount of nanoseconds we are willing to wait for migration to be down.
 335 * the choice of nanoseconds is because it is the maximum resolution that
 336 * get_clock() can achieve. It is an internal measure. All user-visible
 337 * units must be in seconds */
 338static uint64_t max_downtime = 300000000;
 339
 340uint64_t migrate_max_downtime(void)
 341{
 342    return max_downtime;
 343}
 344
 345MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
 346{
 347    MigrationCapabilityStatusList *head = NULL;
 348    MigrationCapabilityStatusList *caps;
 349    MigrationState *s = migrate_get_current();
 350    int i;
 351
 352    caps = NULL; /* silence compiler warning */
 353    for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
 354        if (head == NULL) {
 355            head = g_malloc0(sizeof(*caps));
 356            caps = head;
 357        } else {
 358            caps->next = g_malloc0(sizeof(*caps));
 359            caps = caps->next;
 360        }
 361        caps->value =
 362            g_malloc(sizeof(*caps->value));
 363        caps->value->capability = i;
 364        caps->value->state = s->enabled_capabilities[i];
 365    }
 366
 367    return head;
 368}
 369
 370MigrationParameters *qmp_query_migrate_parameters(Error **errp)
 371{
 372    MigrationParameters *params;
 373    MigrationState *s = migrate_get_current();
 374
 375    params = g_malloc0(sizeof(*params));
 376    params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
 377    params->compress_threads =
 378            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
 379    params->decompress_threads =
 380            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
 381
 382    return params;
 383}
 384
 385static void get_xbzrle_cache_stats(MigrationInfo *info)
 386{
 387    if (migrate_use_xbzrle()) {
 388        info->has_xbzrle_cache = true;
 389        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
 390        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
 391        info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
 392        info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
 393        info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
 394        info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
 395        info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
 396    }
 397}
 398
 399MigrationInfo *qmp_query_migrate(Error **errp)
 400{
 401    MigrationInfo *info = g_malloc0(sizeof(*info));
 402    MigrationState *s = migrate_get_current();
 403
 404    switch (s->state) {
 405    case MIGRATION_STATUS_NONE:
 406        /* no migration has happened ever */
 407        break;
 408    case MIGRATION_STATUS_SETUP:
 409        info->has_status = true;
 410        info->has_total_time = false;
 411        break;
 412    case MIGRATION_STATUS_ACTIVE:
 413    case MIGRATION_STATUS_CANCELLING:
 414        info->has_status = true;
 415        info->has_total_time = true;
 416        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
 417            - s->total_time;
 418        info->has_expected_downtime = true;
 419        info->expected_downtime = s->expected_downtime;
 420        info->has_setup_time = true;
 421        info->setup_time = s->setup_time;
 422
 423        info->has_ram = true;
 424        info->ram = g_malloc0(sizeof(*info->ram));
 425        info->ram->transferred = ram_bytes_transferred();
 426        info->ram->remaining = ram_bytes_remaining();
 427        info->ram->total = ram_bytes_total();
 428        info->ram->duplicate = dup_mig_pages_transferred();
 429        info->ram->skipped = skipped_mig_pages_transferred();
 430        info->ram->normal = norm_mig_pages_transferred();
 431        info->ram->normal_bytes = norm_mig_bytes_transferred();
 432        info->ram->dirty_pages_rate = s->dirty_pages_rate;
 433        info->ram->mbps = s->mbps;
 434        info->ram->dirty_sync_count = s->dirty_sync_count;
 435
 436        if (blk_mig_active()) {
 437            info->has_disk = true;
 438            info->disk = g_malloc0(sizeof(*info->disk));
 439            info->disk->transferred = blk_mig_bytes_transferred();
 440            info->disk->remaining = blk_mig_bytes_remaining();
 441            info->disk->total = blk_mig_bytes_total();
 442        }
 443
 444        get_xbzrle_cache_stats(info);
 445        break;
 446    case MIGRATION_STATUS_COMPLETED:
 447        get_xbzrle_cache_stats(info);
 448
 449        info->has_status = true;
 450        info->has_total_time = true;
 451        info->total_time = s->total_time;
 452        info->has_downtime = true;
 453        info->downtime = s->downtime;
 454        info->has_setup_time = true;
 455        info->setup_time = s->setup_time;
 456
 457        info->has_ram = true;
 458        info->ram = g_malloc0(sizeof(*info->ram));
 459        info->ram->transferred = ram_bytes_transferred();
 460        info->ram->remaining = 0;
 461        info->ram->total = ram_bytes_total();
 462        info->ram->duplicate = dup_mig_pages_transferred();
 463        info->ram->skipped = skipped_mig_pages_transferred();
 464        info->ram->normal = norm_mig_pages_transferred();
 465        info->ram->normal_bytes = norm_mig_bytes_transferred();
 466        info->ram->mbps = s->mbps;
 467        info->ram->dirty_sync_count = s->dirty_sync_count;
 468        break;
 469    case MIGRATION_STATUS_FAILED:
 470        info->has_status = true;
 471        break;
 472    case MIGRATION_STATUS_CANCELLED:
 473        info->has_status = true;
 474        break;
 475    }
 476    info->status = s->state;
 477
 478    return info;
 479}
 480
 481void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 482                                  Error **errp)
 483{
 484    MigrationState *s = migrate_get_current();
 485    MigrationCapabilityStatusList *cap;
 486
 487    if (s->state == MIGRATION_STATUS_ACTIVE ||
 488        s->state == MIGRATION_STATUS_SETUP) {
 489        error_setg(errp, QERR_MIGRATION_ACTIVE);
 490        return;
 491    }
 492
 493    for (cap = params; cap; cap = cap->next) {
 494        s->enabled_capabilities[cap->value->capability] = cap->value->state;
 495    }
 496}
 497
 498void qmp_migrate_set_parameters(bool has_compress_level,
 499                                int64_t compress_level,
 500                                bool has_compress_threads,
 501                                int64_t compress_threads,
 502                                bool has_decompress_threads,
 503                                int64_t decompress_threads, Error **errp)
 504{
 505    MigrationState *s = migrate_get_current();
 506
 507    if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
 508        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
 509                   "is invalid, it should be in the range of 0 to 9");
 510        return;
 511    }
 512    if (has_compress_threads &&
 513            (compress_threads < 1 || compress_threads > 255)) {
 514        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 515                   "compress_threads",
 516                   "is invalid, it should be in the range of 1 to 255");
 517        return;
 518    }
 519    if (has_decompress_threads &&
 520            (decompress_threads < 1 || decompress_threads > 255)) {
 521        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 522                   "decompress_threads",
 523                   "is invalid, it should be in the range of 1 to 255");
 524        return;
 525    }
 526
 527    if (has_compress_level) {
 528        s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
 529    }
 530    if (has_compress_threads) {
 531        s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
 532    }
 533    if (has_decompress_threads) {
 534        s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
 535                                                    decompress_threads;
 536    }
 537}
 538
 539/* shared migration helpers */
 540
 541static void migrate_set_state(MigrationState *s, int old_state, int new_state)
 542{
 543    if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
 544        trace_migrate_set_state(new_state);
 545        migrate_generate_event(new_state);
 546    }
 547}
 548
 549static void migrate_fd_cleanup(void *opaque)
 550{
 551    MigrationState *s = opaque;
 552
 553    qemu_bh_delete(s->cleanup_bh);
 554    s->cleanup_bh = NULL;
 555
 556    if (s->file) {
 557        trace_migrate_fd_cleanup();
 558        qemu_mutex_unlock_iothread();
 559        qemu_thread_join(&s->thread);
 560        qemu_mutex_lock_iothread();
 561
 562        migrate_compress_threads_join();
 563        qemu_fclose(s->file);
 564        s->file = NULL;
 565    }
 566
 567    assert(s->state != MIGRATION_STATUS_ACTIVE);
 568
 569    if (s->state != MIGRATION_STATUS_COMPLETED) {
 570        qemu_savevm_state_cancel();
 571        if (s->state == MIGRATION_STATUS_CANCELLING) {
 572            migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
 573                              MIGRATION_STATUS_CANCELLED);
 574        }
 575    }
 576
 577    notifier_list_notify(&migration_state_notifiers, s);
 578}
 579
 580void migrate_fd_error(MigrationState *s)
 581{
 582    trace_migrate_fd_error();
 583    assert(s->file == NULL);
 584    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
 585    notifier_list_notify(&migration_state_notifiers, s);
 586}
 587
 588static void migrate_fd_cancel(MigrationState *s)
 589{
 590    int old_state ;
 591    QEMUFile *f = migrate_get_current()->file;
 592    trace_migrate_fd_cancel();
 593
 594    do {
 595        old_state = s->state;
 596        if (old_state != MIGRATION_STATUS_SETUP &&
 597            old_state != MIGRATION_STATUS_ACTIVE) {
 598            break;
 599        }
 600        migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
 601    } while (s->state != MIGRATION_STATUS_CANCELLING);
 602
 603    /*
 604     * If we're unlucky the migration code might be stuck somewhere in a
 605     * send/write while the network has failed and is waiting to timeout;
 606     * if we've got shutdown(2) available then we can force it to quit.
 607     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
 608     * called in a bh, so there is no race against this cancel.
 609     */
 610    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
 611        qemu_file_shutdown(f);
 612    }
 613}
 614
 615void add_migration_state_change_notifier(Notifier *notify)
 616{
 617    notifier_list_add(&migration_state_notifiers, notify);
 618}
 619
 620void remove_migration_state_change_notifier(Notifier *notify)
 621{
 622    notifier_remove(notify);
 623}
 624
 625bool migration_in_setup(MigrationState *s)
 626{
 627    return s->state == MIGRATION_STATUS_SETUP;
 628}
 629
 630bool migration_has_finished(MigrationState *s)
 631{
 632    return s->state == MIGRATION_STATUS_COMPLETED;
 633}
 634
 635bool migration_has_failed(MigrationState *s)
 636{
 637    return (s->state == MIGRATION_STATUS_CANCELLED ||
 638            s->state == MIGRATION_STATUS_FAILED);
 639}
 640
 641static MigrationState *migrate_init(const MigrationParams *params)
 642{
 643    MigrationState *s = migrate_get_current();
 644    int64_t bandwidth_limit = s->bandwidth_limit;
 645    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
 646    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
 647    int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
 648    int compress_thread_count =
 649            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
 650    int decompress_thread_count =
 651            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
 652
 653    memcpy(enabled_capabilities, s->enabled_capabilities,
 654           sizeof(enabled_capabilities));
 655
 656    memset(s, 0, sizeof(*s));
 657    s->params = *params;
 658    memcpy(s->enabled_capabilities, enabled_capabilities,
 659           sizeof(enabled_capabilities));
 660    s->xbzrle_cache_size = xbzrle_cache_size;
 661
 662    s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
 663    s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
 664               compress_thread_count;
 665    s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
 666               decompress_thread_count;
 667    s->bandwidth_limit = bandwidth_limit;
 668    migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 669
 670    s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 671    return s;
 672}
 673
 674static GSList *migration_blockers;
 675
 676void migrate_add_blocker(Error *reason)
 677{
 678    migration_blockers = g_slist_prepend(migration_blockers, reason);
 679}
 680
 681void migrate_del_blocker(Error *reason)
 682{
 683    migration_blockers = g_slist_remove(migration_blockers, reason);
 684}
 685
 686void qmp_migrate_incoming(const char *uri, Error **errp)
 687{
 688    Error *local_err = NULL;
 689    static bool once = true;
 690
 691    if (!deferred_incoming) {
 692        error_setg(errp, "For use with '-incoming defer'");
 693        return;
 694    }
 695    if (!once) {
 696        error_setg(errp, "The incoming migration has already been started");
 697    }
 698
 699    qemu_start_incoming_migration(uri, &local_err);
 700
 701    if (local_err) {
 702        error_propagate(errp, local_err);
 703        return;
 704    }
 705
 706    once = false;
 707}
 708
 709void qmp_migrate(const char *uri, bool has_blk, bool blk,
 710                 bool has_inc, bool inc, bool has_detach, bool detach,
 711                 Error **errp)
 712{
 713    Error *local_err = NULL;
 714    MigrationState *s = migrate_get_current();
 715    MigrationParams params;
 716    const char *p;
 717
 718    params.blk = has_blk && blk;
 719    params.shared = has_inc && inc;
 720
 721    if (s->state == MIGRATION_STATUS_ACTIVE ||
 722        s->state == MIGRATION_STATUS_SETUP ||
 723        s->state == MIGRATION_STATUS_CANCELLING) {
 724        error_setg(errp, QERR_MIGRATION_ACTIVE);
 725        return;
 726    }
 727    if (runstate_check(RUN_STATE_INMIGRATE)) {
 728        error_setg(errp, "Guest is waiting for an incoming migration");
 729        return;
 730    }
 731
 732    if (qemu_savevm_state_blocked(errp)) {
 733        return;
 734    }
 735
 736    if (migration_blockers) {
 737        *errp = error_copy(migration_blockers->data);
 738        return;
 739    }
 740
 741    /* We are starting a new migration, so we want to start in a clean
 742       state.  This change is only needed if previous migration
 743       failed/was cancelled.  We don't use migrate_set_state() because
 744       we are setting the initial state, not changing it. */
 745    s->state = MIGRATION_STATUS_NONE;
 746
 747    s = migrate_init(&params);
 748
 749    if (strstart(uri, "tcp:", &p)) {
 750        tcp_start_outgoing_migration(s, p, &local_err);
 751#ifdef CONFIG_RDMA
 752    } else if (strstart(uri, "rdma:", &p)) {
 753        rdma_start_outgoing_migration(s, p, &local_err);
 754#endif
 755#if !defined(WIN32)
 756    } else if (strstart(uri, "exec:", &p)) {
 757        exec_start_outgoing_migration(s, p, &local_err);
 758    } else if (strstart(uri, "unix:", &p)) {
 759        unix_start_outgoing_migration(s, p, &local_err);
 760    } else if (strstart(uri, "fd:", &p)) {
 761        fd_start_outgoing_migration(s, p, &local_err);
 762#endif
 763    } else {
 764        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
 765                   "a valid migration protocol");
 766        migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
 767        return;
 768    }
 769
 770    if (local_err) {
 771        migrate_fd_error(s);
 772        error_propagate(errp, local_err);
 773        return;
 774    }
 775}
 776
 777void qmp_migrate_cancel(Error **errp)
 778{
 779    migrate_fd_cancel(migrate_get_current());
 780}
 781
 782void qmp_migrate_set_cache_size(int64_t value, Error **errp)
 783{
 784    MigrationState *s = migrate_get_current();
 785    int64_t new_size;
 786
 787    /* Check for truncation */
 788    if (value != (size_t)value) {
 789        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
 790                   "exceeding address space");
 791        return;
 792    }
 793
 794    /* Cache should not be larger than guest ram size */
 795    if (value > ram_bytes_total()) {
 796        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
 797                   "exceeds guest ram size ");
 798        return;
 799    }
 800
 801    new_size = xbzrle_cache_resize(value);
 802    if (new_size < 0) {
 803        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
 804                   "is smaller than page size");
 805        return;
 806    }
 807
 808    s->xbzrle_cache_size = new_size;
 809}
 810
 811int64_t qmp_query_migrate_cache_size(Error **errp)
 812{
 813    return migrate_xbzrle_cache_size();
 814}
 815
 816void qmp_migrate_set_speed(int64_t value, Error **errp)
 817{
 818    MigrationState *s;
 819
 820    if (value < 0) {
 821        value = 0;
 822    }
 823    if (value > SIZE_MAX) {
 824        value = SIZE_MAX;
 825    }
 826
 827    s = migrate_get_current();
 828    s->bandwidth_limit = value;
 829    if (s->file) {
 830        qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
 831    }
 832}
 833
 834void qmp_migrate_set_downtime(double value, Error **errp)
 835{
 836    value *= 1e9;
 837    value = MAX(0, MIN(UINT64_MAX, value));
 838    max_downtime = (uint64_t)value;
 839}
 840
 841bool migrate_auto_converge(void)
 842{
 843    MigrationState *s;
 844
 845    s = migrate_get_current();
 846
 847    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 848}
 849
 850bool migrate_zero_blocks(void)
 851{
 852    MigrationState *s;
 853
 854    s = migrate_get_current();
 855
 856    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 857}
 858
 859bool migrate_use_compression(void)
 860{
 861    MigrationState *s;
 862
 863    s = migrate_get_current();
 864
 865    return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
 866}
 867
 868int migrate_compress_level(void)
 869{
 870    MigrationState *s;
 871
 872    s = migrate_get_current();
 873
 874    return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
 875}
 876
 877int migrate_compress_threads(void)
 878{
 879    MigrationState *s;
 880
 881    s = migrate_get_current();
 882
 883    return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
 884}
 885
 886int migrate_decompress_threads(void)
 887{
 888    MigrationState *s;
 889
 890    s = migrate_get_current();
 891
 892    return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
 893}
 894
 895bool migrate_use_events(void)
 896{
 897    MigrationState *s;
 898
 899    s = migrate_get_current();
 900
 901    return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
 902}
 903
 904int migrate_use_xbzrle(void)
 905{
 906    MigrationState *s;
 907
 908    s = migrate_get_current();
 909
 910    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
 911}
 912
 913int64_t migrate_xbzrle_cache_size(void)
 914{
 915    MigrationState *s;
 916
 917    s = migrate_get_current();
 918
 919    return s->xbzrle_cache_size;
 920}
 921
 922/* migration thread support */
 923
 924static void *migration_thread(void *opaque)
 925{
 926    MigrationState *s = opaque;
 927    int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 928    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 929    int64_t initial_bytes = 0;
 930    int64_t max_size = 0;
 931    int64_t start_time = initial_time;
 932    bool old_vm_running = false;
 933
 934    rcu_register_thread();
 935
 936    qemu_savevm_state_header(s->file);
 937    qemu_savevm_state_begin(s->file, &s->params);
 938
 939    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
 940    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
 941
 942    while (s->state == MIGRATION_STATUS_ACTIVE) {
 943        int64_t current_time;
 944        uint64_t pending_size;
 945
 946        if (!qemu_file_rate_limit(s->file)) {
 947            pending_size = qemu_savevm_state_pending(s->file, max_size);
 948            trace_migrate_pending(pending_size, max_size);
 949            if (pending_size && pending_size >= max_size) {
 950                qemu_savevm_state_iterate(s->file);
 951            } else {
 952                int ret;
 953
 954                qemu_mutex_lock_iothread();
 955                start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 956                qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
 957                old_vm_running = runstate_is_running();
 958
 959                ret = global_state_store();
 960                if (!ret) {
 961                    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
 962                    if (ret >= 0) {
 963                        qemu_file_set_rate_limit(s->file, INT64_MAX);
 964                        qemu_savevm_state_complete(s->file);
 965                    }
 966                }
 967                qemu_mutex_unlock_iothread();
 968
 969                if (ret < 0) {
 970                    migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
 971                                      MIGRATION_STATUS_FAILED);
 972                    break;
 973                }
 974
 975                if (!qemu_file_get_error(s->file)) {
 976                    migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
 977                                      MIGRATION_STATUS_COMPLETED);
 978                    break;
 979                }
 980            }
 981        }
 982
 983        if (qemu_file_get_error(s->file)) {
 984            migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
 985                              MIGRATION_STATUS_FAILED);
 986            break;
 987        }
 988        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 989        if (current_time >= initial_time + BUFFER_DELAY) {
 990            uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
 991            uint64_t time_spent = current_time - initial_time;
 992            double bandwidth = transferred_bytes / time_spent;
 993            max_size = bandwidth * migrate_max_downtime() / 1000000;
 994
 995            s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
 996                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;
 997
 998            trace_migrate_transferred(transferred_bytes, time_spent,
 999                                      bandwidth, max_size);
1000            /* if we haven't sent anything, we don't want to recalculate
1001               10000 is a small enough number for our purposes */
1002            if (s->dirty_bytes_rate && transferred_bytes > 10000) {
1003                s->expected_downtime = s->dirty_bytes_rate / bandwidth;
1004            }
1005
1006            qemu_file_reset_rate_limit(s->file);
1007            initial_time = current_time;
1008            initial_bytes = qemu_ftell(s->file);
1009        }
1010        if (qemu_file_rate_limit(s->file)) {
1011            /* usleep expects microseconds */
1012            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
1013        }
1014    }
1015
1016    qemu_mutex_lock_iothread();
1017    if (s->state == MIGRATION_STATUS_COMPLETED) {
1018        int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1019        uint64_t transferred_bytes = qemu_ftell(s->file);
1020        s->total_time = end_time - s->total_time;
1021        s->downtime = end_time - start_time;
1022        if (s->total_time) {
1023            s->mbps = (((double) transferred_bytes * 8.0) /
1024                       ((double) s->total_time)) / 1000;
1025        }
1026        runstate_set(RUN_STATE_POSTMIGRATE);
1027    } else {
1028        if (old_vm_running) {
1029            vm_start();
1030        }
1031    }
1032    qemu_bh_schedule(s->cleanup_bh);
1033    qemu_mutex_unlock_iothread();
1034
1035    rcu_unregister_thread();
1036    return NULL;
1037}
1038
1039void migrate_fd_connect(MigrationState *s)
1040{
1041    /* This is a best 1st approximation. ns to ms */
1042    s->expected_downtime = max_downtime/1000000;
1043    s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
1044
1045    qemu_file_set_rate_limit(s->file,
1046                             s->bandwidth_limit / XFER_LIMIT_RATIO);
1047
1048    /* Notify before starting migration thread */
1049    notifier_list_notify(&migration_state_notifiers, s);
1050
1051    migrate_compress_threads_create();
1052    qemu_thread_create(&s->thread, "migration", migration_thread, s,
1053                       QEMU_THREAD_JOINABLE);
1054}
1055