qemu/migration/savevm.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009-2015 Red Hat Inc
   6 *
   7 * Authors:
   8 *  Juan Quintela <quintela@redhat.com>
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include "hw/boards.h"
  31#include "hw/xen/xen.h"
  32#include "net/net.h"
  33#include "migration.h"
  34#include "migration/snapshot.h"
  35#include "migration/misc.h"
  36#include "migration/register.h"
  37#include "migration/global_state.h"
  38#include "ram.h"
  39#include "qemu-file-channel.h"
  40#include "qemu-file.h"
  41#include "savevm.h"
  42#include "postcopy-ram.h"
  43#include "qapi/error.h"
  44#include "qapi/qapi-commands-migration.h"
  45#include "qapi/qapi-commands-misc.h"
  46#include "qapi/qmp/qerror.h"
  47#include "qemu/error-report.h"
  48#include "sysemu/cpus.h"
  49#include "exec/memory.h"
  50#include "exec/target_page.h"
  51#include "trace.h"
  52#include "qemu/iov.h"
  53#include "block/snapshot.h"
  54#include "qemu/cutils.h"
  55#include "io/channel-buffer.h"
  56#include "io/channel-file.h"
  57#include "sysemu/replay.h"
  58#include "qjson.h"
  59#include "migration/colo.h"
  60#include "qemu/bitmap.h"
  61#include "net/announce.h"
  62
  63const unsigned int postcopy_ram_discard_version = 0;
  64
  65/* Subcommands for QEMU_VM_COMMAND */
  66enum qemu_vm_cmd {
  67    MIG_CMD_INVALID = 0,   /* Must be 0 */
  68    MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
  69    MIG_CMD_PING,              /* Request a PONG on the RP */
  70
  71    MIG_CMD_POSTCOPY_ADVISE,       /* Prior to any page transfers, just
  72                                      warn we might want to do PC */
  73    MIG_CMD_POSTCOPY_LISTEN,       /* Start listening for incoming
  74                                      pages as it's running. */
  75    MIG_CMD_POSTCOPY_RUN,          /* Start execution */
  76
  77    MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
  78                                      were previously sent during
  79                                      precopy but are dirty. */
  80    MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
  81    MIG_CMD_ENABLE_COLO,       /* Enable COLO */
  82    MIG_CMD_POSTCOPY_RESUME,   /* resume postcopy on dest */
  83    MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
  84    MIG_CMD_MAX
  85};
  86
  87#define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  88static struct mig_cmd_args {
  89    ssize_t     len; /* -1 = variable */
  90    const char *name;
  91} mig_cmd_args[] = {
  92    [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
  93    [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
  94    [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
  95    [MIG_CMD_POSTCOPY_ADVISE]  = { .len = -1, .name = "POSTCOPY_ADVISE" },
  96    [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
  97    [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
  98    [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  99                                   .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
 100    [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
 101    [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
 102    [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
 103    [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
 104};
 105
 106/* Note for MIG_CMD_POSTCOPY_ADVISE:
 107 * The format of arguments is depending on postcopy mode:
 108 * - postcopy RAM only
 109 *   uint64_t host page size
 110 *   uint64_t taget page size
 111 *
 112 * - postcopy RAM and postcopy dirty bitmaps
 113 *   format is the same as for postcopy RAM only
 114 *
 115 * - postcopy dirty bitmaps only
 116 *   Nothing. Command length field is 0.
 117 *
 118 * Be careful: adding a new postcopy entity with some other parameters should
 119 * not break format self-description ability. Good way is to introduce some
 120 * generic extendable format with an exception for two old entities.
 121 */
 122
 123/***********************************************************/
 124/* savevm/loadvm support */
 125
 126static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
 127                                   int64_t pos)
 128{
 129    int ret;
 130    QEMUIOVector qiov;
 131
 132    qemu_iovec_init_external(&qiov, iov, iovcnt);
 133    ret = bdrv_writev_vmstate(opaque, &qiov, pos);
 134    if (ret < 0) {
 135        return ret;
 136    }
 137
 138    return qiov.size;
 139}
 140
 141static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
 142                                size_t size)
 143{
 144    return bdrv_load_vmstate(opaque, buf, pos, size);
 145}
 146
 147static int bdrv_fclose(void *opaque)
 148{
 149    return bdrv_flush(opaque);
 150}
 151
 152static const QEMUFileOps bdrv_read_ops = {
 153    .get_buffer = block_get_buffer,
 154    .close =      bdrv_fclose
 155};
 156
 157static const QEMUFileOps bdrv_write_ops = {
 158    .writev_buffer  = block_writev_buffer,
 159    .close          = bdrv_fclose
 160};
 161
 162static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 163{
 164    if (is_writable) {
 165        return qemu_fopen_ops(bs, &bdrv_write_ops);
 166    }
 167    return qemu_fopen_ops(bs, &bdrv_read_ops);
 168}
 169
 170
 171/* QEMUFile timer support.
 172 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
 173 */
 174
 175void timer_put(QEMUFile *f, QEMUTimer *ts)
 176{
 177    uint64_t expire_time;
 178
 179    expire_time = timer_expire_time_ns(ts);
 180    qemu_put_be64(f, expire_time);
 181}
 182
 183void timer_get(QEMUFile *f, QEMUTimer *ts)
 184{
 185    uint64_t expire_time;
 186
 187    expire_time = qemu_get_be64(f);
 188    if (expire_time != -1) {
 189        timer_mod_ns(ts, expire_time);
 190    } else {
 191        timer_del(ts);
 192    }
 193}
 194
 195
 196/* VMState timer support.
 197 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
 198 */
 199
 200static int get_timer(QEMUFile *f, void *pv, size_t size,
 201                     const VMStateField *field)
 202{
 203    QEMUTimer *v = pv;
 204    timer_get(f, v);
 205    return 0;
 206}
 207
 208static int put_timer(QEMUFile *f, void *pv, size_t size,
 209                     const VMStateField *field, QJSON *vmdesc)
 210{
 211    QEMUTimer *v = pv;
 212    timer_put(f, v);
 213
 214    return 0;
 215}
 216
 217const VMStateInfo vmstate_info_timer = {
 218    .name = "timer",
 219    .get  = get_timer,
 220    .put  = put_timer,
 221};
 222
 223
 224typedef struct CompatEntry {
 225    char idstr[256];
 226    int instance_id;
 227} CompatEntry;
 228
 229typedef struct SaveStateEntry {
 230    QTAILQ_ENTRY(SaveStateEntry) entry;
 231    char idstr[256];
 232    int instance_id;
 233    int alias_id;
 234    int version_id;
 235    /* version id read from the stream */
 236    int load_version_id;
 237    int section_id;
 238    /* section id read from the stream */
 239    int load_section_id;
 240    const SaveVMHandlers *ops;
 241    const VMStateDescription *vmsd;
 242    void *opaque;
 243    CompatEntry *compat;
 244    int is_ram;
 245} SaveStateEntry;
 246
 247typedef struct SaveState {
 248    QTAILQ_HEAD(, SaveStateEntry) handlers;
 249    int global_section_id;
 250    uint32_t len;
 251    const char *name;
 252    uint32_t target_page_bits;
 253    uint32_t caps_count;
 254    MigrationCapability *capabilities;
 255} SaveState;
 256
 257static SaveState savevm_state = {
 258    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
 259    .global_section_id = 0,
 260};
 261
 262static bool should_validate_capability(int capability)
 263{
 264    assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
 265    /* Validate only new capabilities to keep compatibility. */
 266    switch (capability) {
 267    case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
 268        return true;
 269    default:
 270        return false;
 271    }
 272}
 273
 274static uint32_t get_validatable_capabilities_count(void)
 275{
 276    MigrationState *s = migrate_get_current();
 277    uint32_t result = 0;
 278    int i;
 279    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 280        if (should_validate_capability(i) && s->enabled_capabilities[i]) {
 281            result++;
 282        }
 283    }
 284    return result;
 285}
 286
 287static int configuration_pre_save(void *opaque)
 288{
 289    SaveState *state = opaque;
 290    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 291    MigrationState *s = migrate_get_current();
 292    int i, j;
 293
 294    state->len = strlen(current_name);
 295    state->name = current_name;
 296    state->target_page_bits = qemu_target_page_bits();
 297
 298    state->caps_count = get_validatable_capabilities_count();
 299    state->capabilities = g_renew(MigrationCapability, state->capabilities,
 300                                  state->caps_count);
 301    for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 302        if (should_validate_capability(i) && s->enabled_capabilities[i]) {
 303            state->capabilities[j++] = i;
 304        }
 305    }
 306
 307    return 0;
 308}
 309
 310static int configuration_pre_load(void *opaque)
 311{
 312    SaveState *state = opaque;
 313
 314    /* If there is no target-page-bits subsection it means the source
 315     * predates the variable-target-page-bits support and is using the
 316     * minimum possible value for this CPU.
 317     */
 318    state->target_page_bits = qemu_target_page_bits_min();
 319    return 0;
 320}
 321
 322static bool configuration_validate_capabilities(SaveState *state)
 323{
 324    bool ret = true;
 325    MigrationState *s = migrate_get_current();
 326    unsigned long *source_caps_bm;
 327    int i;
 328
 329    source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
 330    for (i = 0; i < state->caps_count; i++) {
 331        MigrationCapability capability = state->capabilities[i];
 332        set_bit(capability, source_caps_bm);
 333    }
 334
 335    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 336        bool source_state, target_state;
 337        if (!should_validate_capability(i)) {
 338            continue;
 339        }
 340        source_state = test_bit(i, source_caps_bm);
 341        target_state = s->enabled_capabilities[i];
 342        if (source_state != target_state) {
 343            error_report("Capability %s is %s, but received capability is %s",
 344                         MigrationCapability_str(i),
 345                         target_state ? "on" : "off",
 346                         source_state ? "on" : "off");
 347            ret = false;
 348            /* Don't break here to report all failed capabilities */
 349        }
 350    }
 351
 352    g_free(source_caps_bm);
 353    return ret;
 354}
 355
 356static int configuration_post_load(void *opaque, int version_id)
 357{
 358    SaveState *state = opaque;
 359    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 360
 361    if (strncmp(state->name, current_name, state->len) != 0) {
 362        error_report("Machine type received is '%.*s' and local is '%s'",
 363                     (int) state->len, state->name, current_name);
 364        return -EINVAL;
 365    }
 366
 367    if (state->target_page_bits != qemu_target_page_bits()) {
 368        error_report("Received TARGET_PAGE_BITS is %d but local is %d",
 369                     state->target_page_bits, qemu_target_page_bits());
 370        return -EINVAL;
 371    }
 372
 373    if (!configuration_validate_capabilities(state)) {
 374        return -EINVAL;
 375    }
 376
 377    return 0;
 378}
 379
 380static int get_capability(QEMUFile *f, void *pv, size_t size,
 381                          const VMStateField *field)
 382{
 383    MigrationCapability *capability = pv;
 384    char capability_str[UINT8_MAX + 1];
 385    uint8_t len;
 386    int i;
 387
 388    len = qemu_get_byte(f);
 389    qemu_get_buffer(f, (uint8_t *)capability_str, len);
 390    capability_str[len] = '\0';
 391    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 392        if (!strcmp(MigrationCapability_str(i), capability_str)) {
 393            *capability = i;
 394            return 0;
 395        }
 396    }
 397    error_report("Received unknown capability %s", capability_str);
 398    return -EINVAL;
 399}
 400
 401static int put_capability(QEMUFile *f, void *pv, size_t size,
 402                          const VMStateField *field, QJSON *vmdesc)
 403{
 404    MigrationCapability *capability = pv;
 405    const char *capability_str = MigrationCapability_str(*capability);
 406    size_t len = strlen(capability_str);
 407    assert(len <= UINT8_MAX);
 408
 409    qemu_put_byte(f, len);
 410    qemu_put_buffer(f, (uint8_t *)capability_str, len);
 411    return 0;
 412}
 413
 414static const VMStateInfo vmstate_info_capability = {
 415    .name = "capability",
 416    .get  = get_capability,
 417    .put  = put_capability,
 418};
 419
 420/* The target-page-bits subsection is present only if the
 421 * target page size is not the same as the default (ie the
 422 * minimum page size for a variable-page-size guest CPU).
 423 * If it is present then it contains the actual target page
 424 * bits for the machine, and migration will fail if the
 425 * two ends don't agree about it.
 426 */
 427static bool vmstate_target_page_bits_needed(void *opaque)
 428{
 429    return qemu_target_page_bits()
 430        > qemu_target_page_bits_min();
 431}
 432
 433static const VMStateDescription vmstate_target_page_bits = {
 434    .name = "configuration/target-page-bits",
 435    .version_id = 1,
 436    .minimum_version_id = 1,
 437    .needed = vmstate_target_page_bits_needed,
 438    .fields = (VMStateField[]) {
 439        VMSTATE_UINT32(target_page_bits, SaveState),
 440        VMSTATE_END_OF_LIST()
 441    }
 442};
 443
 444static bool vmstate_capabilites_needed(void *opaque)
 445{
 446    return get_validatable_capabilities_count() > 0;
 447}
 448
 449static const VMStateDescription vmstate_capabilites = {
 450    .name = "configuration/capabilities",
 451    .version_id = 1,
 452    .minimum_version_id = 1,
 453    .needed = vmstate_capabilites_needed,
 454    .fields = (VMStateField[]) {
 455        VMSTATE_UINT32_V(caps_count, SaveState, 1),
 456        VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
 457                                    vmstate_info_capability,
 458                                    MigrationCapability),
 459        VMSTATE_END_OF_LIST()
 460    }
 461};
 462
 463static const VMStateDescription vmstate_configuration = {
 464    .name = "configuration",
 465    .version_id = 1,
 466    .pre_load = configuration_pre_load,
 467    .post_load = configuration_post_load,
 468    .pre_save = configuration_pre_save,
 469    .fields = (VMStateField[]) {
 470        VMSTATE_UINT32(len, SaveState),
 471        VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
 472        VMSTATE_END_OF_LIST()
 473    },
 474    .subsections = (const VMStateDescription*[]) {
 475        &vmstate_target_page_bits,
 476        &vmstate_capabilites,
 477        NULL
 478    }
 479};
 480
 481static void dump_vmstate_vmsd(FILE *out_file,
 482                              const VMStateDescription *vmsd, int indent,
 483                              bool is_subsection);
 484
 485static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
 486                              int indent)
 487{
 488    fprintf(out_file, "%*s{\n", indent, "");
 489    indent += 2;
 490    fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
 491    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 492            field->version_id);
 493    fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
 494            field->field_exists ? "true" : "false");
 495    fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
 496    if (field->vmsd != NULL) {
 497        fprintf(out_file, ",\n");
 498        dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
 499    }
 500    fprintf(out_file, "\n%*s}", indent - 2, "");
 501}
 502
 503static void dump_vmstate_vmss(FILE *out_file,
 504                              const VMStateDescription **subsection,
 505                              int indent)
 506{
 507    if (*subsection != NULL) {
 508        dump_vmstate_vmsd(out_file, *subsection, indent, true);
 509    }
 510}
 511
 512static void dump_vmstate_vmsd(FILE *out_file,
 513                              const VMStateDescription *vmsd, int indent,
 514                              bool is_subsection)
 515{
 516    if (is_subsection) {
 517        fprintf(out_file, "%*s{\n", indent, "");
 518    } else {
 519        fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
 520    }
 521    indent += 2;
 522    fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
 523    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 524            vmsd->version_id);
 525    fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
 526            vmsd->minimum_version_id);
 527    if (vmsd->fields != NULL) {
 528        const VMStateField *field = vmsd->fields;
 529        bool first;
 530
 531        fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
 532        first = true;
 533        while (field->name != NULL) {
 534            if (field->flags & VMS_MUST_EXIST) {
 535                /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
 536                field++;
 537                continue;
 538            }
 539            if (!first) {
 540                fprintf(out_file, ",\n");
 541            }
 542            dump_vmstate_vmsf(out_file, field, indent + 2);
 543            field++;
 544            first = false;
 545        }
 546        fprintf(out_file, "\n%*s]", indent, "");
 547    }
 548    if (vmsd->subsections != NULL) {
 549        const VMStateDescription **subsection = vmsd->subsections;
 550        bool first;
 551
 552        fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
 553        first = true;
 554        while (*subsection != NULL) {
 555            if (!first) {
 556                fprintf(out_file, ",\n");
 557            }
 558            dump_vmstate_vmss(out_file, subsection, indent + 2);
 559            subsection++;
 560            first = false;
 561        }
 562        fprintf(out_file, "\n%*s]", indent, "");
 563    }
 564    fprintf(out_file, "\n%*s}", indent - 2, "");
 565}
 566
 567static void dump_machine_type(FILE *out_file)
 568{
 569    MachineClass *mc;
 570
 571    mc = MACHINE_GET_CLASS(current_machine);
 572
 573    fprintf(out_file, "  \"vmschkmachine\": {\n");
 574    fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
 575    fprintf(out_file, "  },\n");
 576}
 577
 578void dump_vmstate_json_to_file(FILE *out_file)
 579{
 580    GSList *list, *elt;
 581    bool first;
 582
 583    fprintf(out_file, "{\n");
 584    dump_machine_type(out_file);
 585
 586    first = true;
 587    list = object_class_get_list(TYPE_DEVICE, true);
 588    for (elt = list; elt; elt = elt->next) {
 589        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
 590                                             TYPE_DEVICE);
 591        const char *name;
 592        int indent = 2;
 593
 594        if (!dc->vmsd) {
 595            continue;
 596        }
 597
 598        if (!first) {
 599            fprintf(out_file, ",\n");
 600        }
 601        name = object_class_get_name(OBJECT_CLASS(dc));
 602        fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
 603        indent += 2;
 604        fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
 605        fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 606                dc->vmsd->version_id);
 607        fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
 608                dc->vmsd->minimum_version_id);
 609
 610        dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
 611
 612        fprintf(out_file, "\n%*s}", indent - 2, "");
 613        first = false;
 614    }
 615    fprintf(out_file, "\n}\n");
 616    fclose(out_file);
 617}
 618
 619static int calculate_new_instance_id(const char *idstr)
 620{
 621    SaveStateEntry *se;
 622    int instance_id = 0;
 623
 624    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 625        if (strcmp(idstr, se->idstr) == 0
 626            && instance_id <= se->instance_id) {
 627            instance_id = se->instance_id + 1;
 628        }
 629    }
 630    return instance_id;
 631}
 632
 633static int calculate_compat_instance_id(const char *idstr)
 634{
 635    SaveStateEntry *se;
 636    int instance_id = 0;
 637
 638    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 639        if (!se->compat) {
 640            continue;
 641        }
 642
 643        if (strcmp(idstr, se->compat->idstr) == 0
 644            && instance_id <= se->compat->instance_id) {
 645            instance_id = se->compat->instance_id + 1;
 646        }
 647    }
 648    return instance_id;
 649}
 650
 651static inline MigrationPriority save_state_priority(SaveStateEntry *se)
 652{
 653    if (se->vmsd) {
 654        return se->vmsd->priority;
 655    }
 656    return MIG_PRI_DEFAULT;
 657}
 658
 659static void savevm_state_handler_insert(SaveStateEntry *nse)
 660{
 661    MigrationPriority priority = save_state_priority(nse);
 662    SaveStateEntry *se;
 663
 664    assert(priority <= MIG_PRI_MAX);
 665
 666    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 667        if (save_state_priority(se) < priority) {
 668            break;
 669        }
 670    }
 671
 672    if (se) {
 673        QTAILQ_INSERT_BEFORE(se, nse, entry);
 674    } else {
 675        QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
 676    }
 677}
 678
 679/* TODO: Individual devices generally have very little idea about the rest
 680   of the system, so instance_id should be removed/replaced.
 681   Meanwhile pass -1 as instance_id if you do not already have a clearly
 682   distinguishing id for all instances of your device class. */
 683int register_savevm_live(DeviceState *dev,
 684                         const char *idstr,
 685                         int instance_id,
 686                         int version_id,
 687                         const SaveVMHandlers *ops,
 688                         void *opaque)
 689{
 690    SaveStateEntry *se;
 691
 692    se = g_new0(SaveStateEntry, 1);
 693    se->version_id = version_id;
 694    se->section_id = savevm_state.global_section_id++;
 695    se->ops = ops;
 696    se->opaque = opaque;
 697    se->vmsd = NULL;
 698    /* if this is a live_savem then set is_ram */
 699    if (ops->save_setup != NULL) {
 700        se->is_ram = 1;
 701    }
 702
 703    if (dev) {
 704        char *id = qdev_get_dev_path(dev);
 705        if (id) {
 706            if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
 707                sizeof(se->idstr)) {
 708                error_report("Path too long for VMState (%s)", id);
 709                g_free(id);
 710                g_free(se);
 711
 712                return -1;
 713            }
 714            g_free(id);
 715
 716            se->compat = g_new0(CompatEntry, 1);
 717            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
 718            se->compat->instance_id = instance_id == -1 ?
 719                         calculate_compat_instance_id(idstr) : instance_id;
 720            instance_id = -1;
 721        }
 722    }
 723    pstrcat(se->idstr, sizeof(se->idstr), idstr);
 724
 725    if (instance_id == -1) {
 726        se->instance_id = calculate_new_instance_id(se->idstr);
 727    } else {
 728        se->instance_id = instance_id;
 729    }
 730    assert(!se->compat || se->instance_id == 0);
 731    savevm_state_handler_insert(se);
 732    return 0;
 733}
 734
 735void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
 736{
 737    SaveStateEntry *se, *new_se;
 738    char id[256] = "";
 739
 740    if (dev) {
 741        char *path = qdev_get_dev_path(dev);
 742        if (path) {
 743            pstrcpy(id, sizeof(id), path);
 744            pstrcat(id, sizeof(id), "/");
 745            g_free(path);
 746        }
 747    }
 748    pstrcat(id, sizeof(id), idstr);
 749
 750    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 751        if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
 752            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 753            g_free(se->compat);
 754            g_free(se);
 755        }
 756    }
 757}
 758
 759int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 760                                   const VMStateDescription *vmsd,
 761                                   void *opaque, int alias_id,
 762                                   int required_for_version,
 763                                   Error **errp)
 764{
 765    SaveStateEntry *se;
 766
 767    /* If this triggers, alias support can be dropped for the vmsd. */
 768    assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
 769
 770    se = g_new0(SaveStateEntry, 1);
 771    se->version_id = vmsd->version_id;
 772    se->section_id = savevm_state.global_section_id++;
 773    se->opaque = opaque;
 774    se->vmsd = vmsd;
 775    se->alias_id = alias_id;
 776
 777    if (dev) {
 778        char *id = qdev_get_dev_path(dev);
 779        if (id) {
 780            if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
 781                sizeof(se->idstr)) {
 782                error_setg(errp, "Path too long for VMState (%s)", id);
 783                g_free(id);
 784                g_free(se);
 785
 786                return -1;
 787            }
 788            g_free(id);
 789
 790            se->compat = g_new0(CompatEntry, 1);
 791            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
 792            se->compat->instance_id = instance_id == -1 ?
 793                         calculate_compat_instance_id(vmsd->name) : instance_id;
 794            instance_id = -1;
 795        }
 796    }
 797    pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
 798
 799    if (instance_id == -1) {
 800        se->instance_id = calculate_new_instance_id(se->idstr);
 801    } else {
 802        se->instance_id = instance_id;
 803    }
 804    assert(!se->compat || se->instance_id == 0);
 805    savevm_state_handler_insert(se);
 806    return 0;
 807}
 808
 809void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
 810                        void *opaque)
 811{
 812    SaveStateEntry *se, *new_se;
 813
 814    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 815        if (se->vmsd == vmsd && se->opaque == opaque) {
 816            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 817            g_free(se->compat);
 818            g_free(se);
 819        }
 820    }
 821}
 822
 823static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
 824{
 825    trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 826    if (!se->vmsd) {         /* Old style */
 827        return se->ops->load_state(f, se->opaque, se->load_version_id);
 828    }
 829    return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
 830}
 831
 832static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 833{
 834    int64_t old_offset, size;
 835
 836    old_offset = qemu_ftell_fast(f);
 837    se->ops->save_state(f, se->opaque);
 838    size = qemu_ftell_fast(f) - old_offset;
 839
 840    if (vmdesc) {
 841        json_prop_int(vmdesc, "size", size);
 842        json_start_array(vmdesc, "fields");
 843        json_start_object(vmdesc, NULL);
 844        json_prop_str(vmdesc, "name", "data");
 845        json_prop_int(vmdesc, "size", size);
 846        json_prop_str(vmdesc, "type", "buffer");
 847        json_end_object(vmdesc);
 848        json_end_array(vmdesc);
 849    }
 850}
 851
 852static int vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 853{
 854    trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 855    if (!se->vmsd) {
 856        vmstate_save_old_style(f, se, vmdesc);
 857        return 0;
 858    }
 859    return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 860}
 861
 862/*
 863 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
 864 */
 865static void save_section_header(QEMUFile *f, SaveStateEntry *se,
 866                                uint8_t section_type)
 867{
 868    qemu_put_byte(f, section_type);
 869    qemu_put_be32(f, se->section_id);
 870
 871    if (section_type == QEMU_VM_SECTION_FULL ||
 872        section_type == QEMU_VM_SECTION_START) {
 873        /* ID string */
 874        size_t len = strlen(se->idstr);
 875        qemu_put_byte(f, len);
 876        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
 877
 878        qemu_put_be32(f, se->instance_id);
 879        qemu_put_be32(f, se->version_id);
 880    }
 881}
 882
 883/*
 884 * Write a footer onto device sections that catches cases misformatted device
 885 * sections.
 886 */
 887static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
 888{
 889    if (migrate_get_current()->send_section_footer) {
 890        qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
 891        qemu_put_be32(f, se->section_id);
 892    }
 893}
 894
 895/**
 896 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
 897 *                           command and associated data.
 898 *
 899 * @f: File to send command on
 900 * @command: Command type to send
 901 * @len: Length of associated data
 902 * @data: Data associated with command.
 903 */
 904static void qemu_savevm_command_send(QEMUFile *f,
 905                                     enum qemu_vm_cmd command,
 906                                     uint16_t len,
 907                                     uint8_t *data)
 908{
 909    trace_savevm_command_send(command, len);
 910    qemu_put_byte(f, QEMU_VM_COMMAND);
 911    qemu_put_be16(f, (uint16_t)command);
 912    qemu_put_be16(f, len);
 913    qemu_put_buffer(f, data, len);
 914    qemu_fflush(f);
 915}
 916
 917void qemu_savevm_send_colo_enable(QEMUFile *f)
 918{
 919    trace_savevm_send_colo_enable();
 920    qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
 921}
 922
 923void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
 924{
 925    uint32_t buf;
 926
 927    trace_savevm_send_ping(value);
 928    buf = cpu_to_be32(value);
 929    qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
 930}
 931
 932void qemu_savevm_send_open_return_path(QEMUFile *f)
 933{
 934    trace_savevm_send_open_return_path();
 935    qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
 936}
 937
 938/* We have a buffer of data to send; we don't want that all to be loaded
 939 * by the command itself, so the command contains just the length of the
 940 * extra buffer that we then send straight after it.
 941 * TODO: Must be a better way to organise that
 942 *
 943 * Returns:
 944 *    0 on success
 945 *    -ve on error
 946 */
 947int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
 948{
 949    uint32_t tmp;
 950
 951    if (len > MAX_VM_CMD_PACKAGED_SIZE) {
 952        error_report("%s: Unreasonably large packaged state: %zu",
 953                     __func__, len);
 954        return -1;
 955    }
 956
 957    tmp = cpu_to_be32(len);
 958
 959    trace_qemu_savevm_send_packaged();
 960    qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
 961
 962    qemu_put_buffer(f, buf, len);
 963
 964    return 0;
 965}
 966
 967/* Send prior to any postcopy transfer */
 968void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 969{
 970    if (migrate_postcopy_ram()) {
 971        uint64_t tmp[2];
 972        tmp[0] = cpu_to_be64(ram_pagesize_summary());
 973        tmp[1] = cpu_to_be64(qemu_target_page_size());
 974
 975        trace_qemu_savevm_send_postcopy_advise();
 976        qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
 977                                 16, (uint8_t *)tmp);
 978    } else {
 979        qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
 980    }
 981}
 982
 983/* Sent prior to starting the destination running in postcopy, discard pages
 984 * that have already been sent but redirtied on the source.
 985 * CMD_POSTCOPY_RAM_DISCARD consist of:
 986 *      byte   version (0)
 987 *      byte   Length of name field (not including 0)
 988 *  n x byte   RAM block name
 989 *      byte   0 terminator (just for safety)
 990 *  n x        Byte ranges within the named RAMBlock
 991 *      be64   Start of the range
 992 *      be64   Length
 993 *
 994 *  name:  RAMBlock name that these entries are part of
 995 *  len: Number of page entries
 996 *  start_list: 'len' addresses
 997 *  length_list: 'len' addresses
 998 *
 999 */
1000void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
1001                                           uint16_t len,
1002                                           uint64_t *start_list,
1003                                           uint64_t *length_list)
1004{
1005    uint8_t *buf;
1006    uint16_t tmplen;
1007    uint16_t t;
1008    size_t name_len = strlen(name);
1009
1010    trace_qemu_savevm_send_postcopy_ram_discard(name, len);
1011    assert(name_len < 256);
1012    buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
1013    buf[0] = postcopy_ram_discard_version;
1014    buf[1] = name_len;
1015    memcpy(buf + 2, name, name_len);
1016    tmplen = 2 + name_len;
1017    buf[tmplen++] = '\0';
1018
1019    for (t = 0; t < len; t++) {
1020        stq_be_p(buf + tmplen, start_list[t]);
1021        tmplen += 8;
1022        stq_be_p(buf + tmplen, length_list[t]);
1023        tmplen += 8;
1024    }
1025    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
1026    g_free(buf);
1027}
1028
1029/* Get the destination into a state where it can receive postcopy data. */
1030void qemu_savevm_send_postcopy_listen(QEMUFile *f)
1031{
1032    trace_savevm_send_postcopy_listen();
1033    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
1034}
1035
1036/* Kick the destination into running */
1037void qemu_savevm_send_postcopy_run(QEMUFile *f)
1038{
1039    trace_savevm_send_postcopy_run();
1040    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
1041}
1042
1043void qemu_savevm_send_postcopy_resume(QEMUFile *f)
1044{
1045    trace_savevm_send_postcopy_resume();
1046    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
1047}
1048
1049void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
1050{
1051    size_t len;
1052    char buf[256];
1053
1054    trace_savevm_send_recv_bitmap(block_name);
1055
1056    buf[0] = len = strlen(block_name);
1057    memcpy(buf + 1, block_name, len);
1058
1059    qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
1060}
1061
1062bool qemu_savevm_state_blocked(Error **errp)
1063{
1064    SaveStateEntry *se;
1065
1066    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1067        if (se->vmsd && se->vmsd->unmigratable) {
1068            error_setg(errp, "State blocked by non-migratable device '%s'",
1069                       se->idstr);
1070            return true;
1071        }
1072    }
1073    return false;
1074}
1075
1076void qemu_savevm_state_header(QEMUFile *f)
1077{
1078    trace_savevm_state_header();
1079    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1080    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1081
1082    if (migrate_get_current()->send_configuration) {
1083        qemu_put_byte(f, QEMU_VM_CONFIGURATION);
1084        vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
1085    }
1086}
1087
1088void qemu_savevm_state_setup(QEMUFile *f)
1089{
1090    SaveStateEntry *se;
1091    Error *local_err = NULL;
1092    int ret;
1093
1094    trace_savevm_state_setup();
1095    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1096        if (!se->ops || !se->ops->save_setup) {
1097            continue;
1098        }
1099        if (se->ops && se->ops->is_active) {
1100            if (!se->ops->is_active(se->opaque)) {
1101                continue;
1102            }
1103        }
1104        save_section_header(f, se, QEMU_VM_SECTION_START);
1105
1106        ret = se->ops->save_setup(f, se->opaque);
1107        save_section_footer(f, se);
1108        if (ret < 0) {
1109            qemu_file_set_error(f, ret);
1110            break;
1111        }
1112    }
1113
1114    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
1115        error_report_err(local_err);
1116    }
1117}
1118
1119int qemu_savevm_state_resume_prepare(MigrationState *s)
1120{
1121    SaveStateEntry *se;
1122    int ret;
1123
1124    trace_savevm_state_resume_prepare();
1125
1126    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1127        if (!se->ops || !se->ops->resume_prepare) {
1128            continue;
1129        }
1130        if (se->ops && se->ops->is_active) {
1131            if (!se->ops->is_active(se->opaque)) {
1132                continue;
1133            }
1134        }
1135        ret = se->ops->resume_prepare(s, se->opaque);
1136        if (ret < 0) {
1137            return ret;
1138        }
1139    }
1140
1141    return 0;
1142}
1143
1144/*
1145 * this function has three return values:
1146 *   negative: there was one error, and we have -errno.
1147 *   0 : We haven't finished, caller have to go again
1148 *   1 : We have finished, we can go to complete phase
1149 */
1150int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
1151{
1152    SaveStateEntry *se;
1153    int ret = 1;
1154
1155    trace_savevm_state_iterate();
1156    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1157        if (!se->ops || !se->ops->save_live_iterate) {
1158            continue;
1159        }
1160        if (se->ops && se->ops->is_active) {
1161            if (!se->ops->is_active(se->opaque)) {
1162                continue;
1163            }
1164        }
1165        if (se->ops && se->ops->is_active_iterate) {
1166            if (!se->ops->is_active_iterate(se->opaque)) {
1167                continue;
1168            }
1169        }
1170        /*
1171         * In the postcopy phase, any device that doesn't know how to
1172         * do postcopy should have saved it's state in the _complete
1173         * call that's already run, it might get confused if we call
1174         * iterate afterwards.
1175         */
1176        if (postcopy &&
1177            !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
1178            continue;
1179        }
1180        if (qemu_file_rate_limit(f)) {
1181            return 0;
1182        }
1183        trace_savevm_section_start(se->idstr, se->section_id);
1184
1185        save_section_header(f, se, QEMU_VM_SECTION_PART);
1186
1187        ret = se->ops->save_live_iterate(f, se->opaque);
1188        trace_savevm_section_end(se->idstr, se->section_id, ret);
1189        save_section_footer(f, se);
1190
1191        if (ret < 0) {
1192            qemu_file_set_error(f, ret);
1193        }
1194        if (ret <= 0) {
1195            /* Do not proceed to the next vmstate before this one reported
1196               completion of the current stage. This serializes the migration
1197               and reduces the probability that a faster changing state is
1198               synchronized over and over again. */
1199            break;
1200        }
1201    }
1202    return ret;
1203}
1204
1205static bool should_send_vmdesc(void)
1206{
1207    MachineState *machine = MACHINE(qdev_get_machine());
1208    bool in_postcopy = migration_in_postcopy();
1209    return !machine->suppress_vmdesc && !in_postcopy;
1210}
1211
1212/*
1213 * Calls the save_live_complete_postcopy methods
1214 * causing the last few pages to be sent immediately and doing any associated
1215 * cleanup.
1216 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1217 * all the other devices, but that happens at the point we switch to postcopy.
1218 */
1219void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1220{
1221    SaveStateEntry *se;
1222    int ret;
1223
1224    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1225        if (!se->ops || !se->ops->save_live_complete_postcopy) {
1226            continue;
1227        }
1228        if (se->ops && se->ops->is_active) {
1229            if (!se->ops->is_active(se->opaque)) {
1230                continue;
1231            }
1232        }
1233        trace_savevm_section_start(se->idstr, se->section_id);
1234        /* Section type */
1235        qemu_put_byte(f, QEMU_VM_SECTION_END);
1236        qemu_put_be32(f, se->section_id);
1237
1238        ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1239        trace_savevm_section_end(se->idstr, se->section_id, ret);
1240        save_section_footer(f, se);
1241        if (ret < 0) {
1242            qemu_file_set_error(f, ret);
1243            return;
1244        }
1245    }
1246
1247    qemu_put_byte(f, QEMU_VM_EOF);
1248    qemu_fflush(f);
1249}
1250
1251int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
1252                                       bool inactivate_disks)
1253{
1254    QJSON *vmdesc;
1255    int vmdesc_len;
1256    SaveStateEntry *se;
1257    int ret;
1258    bool in_postcopy = migration_in_postcopy();
1259    Error *local_err = NULL;
1260
1261    if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
1262        error_report_err(local_err);
1263    }
1264
1265    trace_savevm_state_complete_precopy();
1266
1267    cpu_synchronize_all_states();
1268
1269    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1270        if (!se->ops ||
1271            (in_postcopy && se->ops->has_postcopy &&
1272             se->ops->has_postcopy(se->opaque)) ||
1273            (in_postcopy && !iterable_only) ||
1274            !se->ops->save_live_complete_precopy) {
1275            continue;
1276        }
1277
1278        if (se->ops && se->ops->is_active) {
1279            if (!se->ops->is_active(se->opaque)) {
1280                continue;
1281            }
1282        }
1283        trace_savevm_section_start(se->idstr, se->section_id);
1284
1285        save_section_header(f, se, QEMU_VM_SECTION_END);
1286
1287        ret = se->ops->save_live_complete_precopy(f, se->opaque);
1288        trace_savevm_section_end(se->idstr, se->section_id, ret);
1289        save_section_footer(f, se);
1290        if (ret < 0) {
1291            qemu_file_set_error(f, ret);
1292            return -1;
1293        }
1294    }
1295
1296    if (iterable_only) {
1297        return 0;
1298    }
1299
1300    vmdesc = qjson_new();
1301    json_prop_int(vmdesc, "page_size", qemu_target_page_size());
1302    json_start_array(vmdesc, "devices");
1303    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1304
1305        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1306            continue;
1307        }
1308        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1309            trace_savevm_section_skip(se->idstr, se->section_id);
1310            continue;
1311        }
1312
1313        trace_savevm_section_start(se->idstr, se->section_id);
1314
1315        json_start_object(vmdesc, NULL);
1316        json_prop_str(vmdesc, "name", se->idstr);
1317        json_prop_int(vmdesc, "instance_id", se->instance_id);
1318
1319        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1320        ret = vmstate_save(f, se, vmdesc);
1321        if (ret) {
1322            qemu_file_set_error(f, ret);
1323            return ret;
1324        }
1325        trace_savevm_section_end(se->idstr, se->section_id, 0);
1326        save_section_footer(f, se);
1327
1328        json_end_object(vmdesc);
1329    }
1330
1331    if (inactivate_disks) {
1332        /* Inactivate before sending QEMU_VM_EOF so that the
1333         * bdrv_invalidate_cache_all() on the other end won't fail. */
1334        ret = bdrv_inactivate_all();
1335        if (ret) {
1336            error_report("%s: bdrv_inactivate_all() failed (%d)",
1337                         __func__, ret);
1338            qemu_file_set_error(f, ret);
1339            return ret;
1340        }
1341    }
1342    if (!in_postcopy) {
1343        /* Postcopy stream will still be going */
1344        qemu_put_byte(f, QEMU_VM_EOF);
1345    }
1346
1347    json_end_array(vmdesc);
1348    qjson_finish(vmdesc);
1349    vmdesc_len = strlen(qjson_get_str(vmdesc));
1350
1351    if (should_send_vmdesc()) {
1352        qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1353        qemu_put_be32(f, vmdesc_len);
1354        qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1355    }
1356    qjson_destroy(vmdesc);
1357
1358    qemu_fflush(f);
1359    return 0;
1360}
1361
1362/* Give an estimate of the amount left to be transferred,
1363 * the result is split into the amount for units that can and
1364 * for units that can't do postcopy.
1365 */
1366void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
1367                               uint64_t *res_precopy_only,
1368                               uint64_t *res_compatible,
1369                               uint64_t *res_postcopy_only)
1370{
1371    SaveStateEntry *se;
1372
1373    *res_precopy_only = 0;
1374    *res_compatible = 0;
1375    *res_postcopy_only = 0;
1376
1377
1378    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1379        if (!se->ops || !se->ops->save_live_pending) {
1380            continue;
1381        }
1382        if (se->ops && se->ops->is_active) {
1383            if (!se->ops->is_active(se->opaque)) {
1384                continue;
1385            }
1386        }
1387        se->ops->save_live_pending(f, se->opaque, threshold_size,
1388                                   res_precopy_only, res_compatible,
1389                                   res_postcopy_only);
1390    }
1391}
1392
1393void qemu_savevm_state_cleanup(void)
1394{
1395    SaveStateEntry *se;
1396    Error *local_err = NULL;
1397
1398    if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
1399        error_report_err(local_err);
1400    }
1401
1402    trace_savevm_state_cleanup();
1403    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1404        if (se->ops && se->ops->save_cleanup) {
1405            se->ops->save_cleanup(se->opaque);
1406        }
1407    }
1408}
1409
1410static int qemu_savevm_state(QEMUFile *f, Error **errp)
1411{
1412    int ret;
1413    MigrationState *ms = migrate_get_current();
1414    MigrationStatus status;
1415
1416    if (migration_is_setup_or_active(ms->state) ||
1417        ms->state == MIGRATION_STATUS_CANCELLING ||
1418        ms->state == MIGRATION_STATUS_COLO) {
1419        error_setg(errp, QERR_MIGRATION_ACTIVE);
1420        return -EINVAL;
1421    }
1422
1423    if (migration_is_blocked(errp)) {
1424        return -EINVAL;
1425    }
1426
1427    if (migrate_use_block()) {
1428        error_setg(errp, "Block migration and snapshots are incompatible");
1429        return -EINVAL;
1430    }
1431
1432    migrate_init(ms);
1433    ms->to_dst_file = f;
1434
1435    qemu_mutex_unlock_iothread();
1436    qemu_savevm_state_header(f);
1437    qemu_savevm_state_setup(f);
1438    qemu_mutex_lock_iothread();
1439
1440    while (qemu_file_get_error(f) == 0) {
1441        if (qemu_savevm_state_iterate(f, false) > 0) {
1442            break;
1443        }
1444    }
1445
1446    ret = qemu_file_get_error(f);
1447    if (ret == 0) {
1448        qemu_savevm_state_complete_precopy(f, false, false);
1449        ret = qemu_file_get_error(f);
1450    }
1451    qemu_savevm_state_cleanup();
1452    if (ret != 0) {
1453        error_setg_errno(errp, -ret, "Error while writing VM state");
1454    }
1455
1456    if (ret != 0) {
1457        status = MIGRATION_STATUS_FAILED;
1458    } else {
1459        status = MIGRATION_STATUS_COMPLETED;
1460    }
1461    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
1462
1463    /* f is outer parameter, it should not stay in global migration state after
1464     * this function finished */
1465    ms->to_dst_file = NULL;
1466
1467    return ret;
1468}
1469
1470void qemu_savevm_live_state(QEMUFile *f)
1471{
1472    /* save QEMU_VM_SECTION_END section */
1473    qemu_savevm_state_complete_precopy(f, true, false);
1474    qemu_put_byte(f, QEMU_VM_EOF);
1475}
1476
1477int qemu_save_device_state(QEMUFile *f)
1478{
1479    SaveStateEntry *se;
1480
1481    if (!migration_in_colo_state()) {
1482        qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1483        qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1484    }
1485    cpu_synchronize_all_states();
1486
1487    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1488        int ret;
1489
1490        if (se->is_ram) {
1491            continue;
1492        }
1493        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1494            continue;
1495        }
1496        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1497            continue;
1498        }
1499
1500        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1501
1502        ret = vmstate_save(f, se, NULL);
1503        if (ret) {
1504            return ret;
1505        }
1506
1507        save_section_footer(f, se);
1508    }
1509
1510    qemu_put_byte(f, QEMU_VM_EOF);
1511
1512    return qemu_file_get_error(f);
1513}
1514
1515static SaveStateEntry *find_se(const char *idstr, int instance_id)
1516{
1517    SaveStateEntry *se;
1518
1519    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1520        if (!strcmp(se->idstr, idstr) &&
1521            (instance_id == se->instance_id ||
1522             instance_id == se->alias_id))
1523            return se;
1524        /* Migrating from an older version? */
1525        if (strstr(se->idstr, idstr) && se->compat) {
1526            if (!strcmp(se->compat->idstr, idstr) &&
1527                (instance_id == se->compat->instance_id ||
1528                 instance_id == se->alias_id))
1529                return se;
1530        }
1531    }
1532    return NULL;
1533}
1534
1535enum LoadVMExitCodes {
1536    /* Allow a command to quit all layers of nested loadvm loops */
1537    LOADVM_QUIT     =  1,
1538};
1539
1540/* ------ incoming postcopy messages ------ */
1541/* 'advise' arrives before any transfers just to tell us that a postcopy
1542 * *might* happen - it might be skipped if precopy transferred everything
1543 * quickly.
1544 */
1545static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
1546                                         uint16_t len)
1547{
1548    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1549    uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
1550    Error *local_err = NULL;
1551
1552    trace_loadvm_postcopy_handle_advise();
1553    if (ps != POSTCOPY_INCOMING_NONE) {
1554        error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1555        return -1;
1556    }
1557
1558    switch (len) {
1559    case 0:
1560        if (migrate_postcopy_ram()) {
1561            error_report("RAM postcopy is enabled but have 0 byte advise");
1562            return -EINVAL;
1563        }
1564        return 0;
1565    case 8 + 8:
1566        if (!migrate_postcopy_ram()) {
1567            error_report("RAM postcopy is disabled but have 16 byte advise");
1568            return -EINVAL;
1569        }
1570        break;
1571    default:
1572        error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
1573        return -EINVAL;
1574    }
1575
1576    if (!postcopy_ram_supported_by_host(mis)) {
1577        postcopy_state_set(POSTCOPY_INCOMING_NONE);
1578        return -1;
1579    }
1580
1581    remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
1582    local_pagesize_summary = ram_pagesize_summary();
1583
1584    if (remote_pagesize_summary != local_pagesize_summary)  {
1585        /*
1586         * This detects two potential causes of mismatch:
1587         *   a) A mismatch in host page sizes
1588         *      Some combinations of mismatch are probably possible but it gets
1589         *      a bit more complicated.  In particular we need to place whole
1590         *      host pages on the dest at once, and we need to ensure that we
1591         *      handle dirtying to make sure we never end up sending part of
1592         *      a hostpage on it's own.
1593         *   b) The use of different huge page sizes on source/destination
1594         *      a more fine grain test is performed during RAM block migration
1595         *      but this test here causes a nice early clear failure, and
1596         *      also fails when passed to an older qemu that doesn't
1597         *      do huge pages.
1598         */
1599        error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
1600                                                             " d=%" PRIx64 ")",
1601                     remote_pagesize_summary, local_pagesize_summary);
1602        return -1;
1603    }
1604
1605    remote_tps = qemu_get_be64(mis->from_src_file);
1606    if (remote_tps != qemu_target_page_size()) {
1607        /*
1608         * Again, some differences could be dealt with, but for now keep it
1609         * simple.
1610         */
1611        error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
1612                     (int)remote_tps, qemu_target_page_size());
1613        return -1;
1614    }
1615
1616    if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
1617        error_report_err(local_err);
1618        return -1;
1619    }
1620
1621    if (ram_postcopy_incoming_init(mis)) {
1622        return -1;
1623    }
1624
1625    postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1626
1627    return 0;
1628}
1629
1630/* After postcopy we will be told to throw some pages away since they're
1631 * dirty and will have to be demand fetched.  Must happen before CPU is
1632 * started.
1633 * There can be 0..many of these messages, each encoding multiple pages.
1634 */
1635static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1636                                              uint16_t len)
1637{
1638    int tmp;
1639    char ramid[256];
1640    PostcopyState ps = postcopy_state_get();
1641
1642    trace_loadvm_postcopy_ram_handle_discard();
1643
1644    switch (ps) {
1645    case POSTCOPY_INCOMING_ADVISE:
1646        /* 1st discard */
1647        tmp = postcopy_ram_prepare_discard(mis);
1648        if (tmp) {
1649            return tmp;
1650        }
1651        break;
1652
1653    case POSTCOPY_INCOMING_DISCARD:
1654        /* Expected state */
1655        break;
1656
1657    default:
1658        error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1659                     ps);
1660        return -1;
1661    }
1662    /* We're expecting a
1663     *    Version (0)
1664     *    a RAM ID string (length byte, name, 0 term)
1665     *    then at least 1 16 byte chunk
1666    */
1667    if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1668        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1669        return -1;
1670    }
1671
1672    tmp = qemu_get_byte(mis->from_src_file);
1673    if (tmp != postcopy_ram_discard_version) {
1674        error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1675        return -1;
1676    }
1677
1678    if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1679        error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1680        return -1;
1681    }
1682    tmp = qemu_get_byte(mis->from_src_file);
1683    if (tmp != 0) {
1684        error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1685        return -1;
1686    }
1687
1688    len -= 3 + strlen(ramid);
1689    if (len % 16) {
1690        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1691        return -1;
1692    }
1693    trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1694    while (len) {
1695        uint64_t start_addr, block_length;
1696        start_addr = qemu_get_be64(mis->from_src_file);
1697        block_length = qemu_get_be64(mis->from_src_file);
1698
1699        len -= 16;
1700        int ret = ram_discard_range(ramid, start_addr, block_length);
1701        if (ret) {
1702            return ret;
1703        }
1704    }
1705    trace_loadvm_postcopy_ram_handle_discard_end();
1706
1707    return 0;
1708}
1709
1710/*
1711 * Triggered by a postcopy_listen command; this thread takes over reading
1712 * the input stream, leaving the main thread free to carry on loading the rest
1713 * of the device state (from RAM).
1714 * (TODO:This could do with being in a postcopy file - but there again it's
1715 * just another input loop, not that postcopy specific)
1716 */
1717static void *postcopy_ram_listen_thread(void *opaque)
1718{
1719    MigrationIncomingState *mis = migration_incoming_get_current();
1720    QEMUFile *f = mis->from_src_file;
1721    int load_res;
1722
1723    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1724                                   MIGRATION_STATUS_POSTCOPY_ACTIVE);
1725    qemu_sem_post(&mis->listen_thread_sem);
1726    trace_postcopy_ram_listen_thread_start();
1727
1728    rcu_register_thread();
1729    /*
1730     * Because we're a thread and not a coroutine we can't yield
1731     * in qemu_file, and thus we must be blocking now.
1732     */
1733    qemu_file_set_blocking(f, true);
1734    load_res = qemu_loadvm_state_main(f, mis);
1735
1736    /*
1737     * This is tricky, but, mis->from_src_file can change after it
1738     * returns, when postcopy recovery happened. In the future, we may
1739     * want a wrapper for the QEMUFile handle.
1740     */
1741    f = mis->from_src_file;
1742
1743    /* And non-blocking again so we don't block in any cleanup */
1744    qemu_file_set_blocking(f, false);
1745
1746    trace_postcopy_ram_listen_thread_exit();
1747    if (load_res < 0) {
1748        error_report("%s: loadvm failed: %d", __func__, load_res);
1749        qemu_file_set_error(f, load_res);
1750        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1751                                       MIGRATION_STATUS_FAILED);
1752    } else {
1753        /*
1754         * This looks good, but it's possible that the device loading in the
1755         * main thread hasn't finished yet, and so we might not be in 'RUN'
1756         * state yet; wait for the end of the main thread.
1757         */
1758        qemu_event_wait(&mis->main_thread_load_event);
1759    }
1760    postcopy_ram_incoming_cleanup(mis);
1761
1762    if (load_res < 0) {
1763        /*
1764         * If something went wrong then we have a bad state so exit;
1765         * depending how far we got it might be possible at this point
1766         * to leave the guest running and fire MCEs for pages that never
1767         * arrived as a desperate recovery step.
1768         */
1769        rcu_unregister_thread();
1770        exit(EXIT_FAILURE);
1771    }
1772
1773    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1774                                   MIGRATION_STATUS_COMPLETED);
1775    /*
1776     * If everything has worked fine, then the main thread has waited
1777     * for us to start, and we're the last use of the mis.
1778     * (If something broke then qemu will have to exit anyway since it's
1779     * got a bad migration state).
1780     */
1781    migration_incoming_state_destroy();
1782    qemu_loadvm_state_cleanup();
1783
1784    rcu_unregister_thread();
1785    mis->have_listen_thread = false;
1786    return NULL;
1787}
1788
1789/* After this message we must be able to immediately receive postcopy data */
1790static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1791{
1792    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1793    trace_loadvm_postcopy_handle_listen();
1794    Error *local_err = NULL;
1795
1796    if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1797        error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1798        return -1;
1799    }
1800    if (ps == POSTCOPY_INCOMING_ADVISE) {
1801        /*
1802         * A rare case, we entered listen without having to do any discards,
1803         * so do the setup that's normally done at the time of the 1st discard.
1804         */
1805        if (migrate_postcopy_ram()) {
1806            postcopy_ram_prepare_discard(mis);
1807        }
1808    }
1809
1810    /*
1811     * Sensitise RAM - can now generate requests for blocks that don't exist
1812     * However, at this point the CPU shouldn't be running, and the IO
1813     * shouldn't be doing anything yet so don't actually expect requests
1814     */
1815    if (migrate_postcopy_ram()) {
1816        if (postcopy_ram_enable_notify(mis)) {
1817            postcopy_ram_incoming_cleanup(mis);
1818            return -1;
1819        }
1820    }
1821
1822    if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
1823        error_report_err(local_err);
1824        return -1;
1825    }
1826
1827    if (mis->have_listen_thread) {
1828        error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1829        return -1;
1830    }
1831
1832    mis->have_listen_thread = true;
1833    /* Start up the listening thread and wait for it to signal ready */
1834    qemu_sem_init(&mis->listen_thread_sem, 0);
1835    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1836                       postcopy_ram_listen_thread, NULL,
1837                       QEMU_THREAD_DETACHED);
1838    qemu_sem_wait(&mis->listen_thread_sem);
1839    qemu_sem_destroy(&mis->listen_thread_sem);
1840
1841    return 0;
1842}
1843
1844
1845typedef struct {
1846    QEMUBH *bh;
1847} HandleRunBhData;
1848
1849static void loadvm_postcopy_handle_run_bh(void *opaque)
1850{
1851    Error *local_err = NULL;
1852    HandleRunBhData *data = opaque;
1853    MigrationIncomingState *mis = migration_incoming_get_current();
1854
1855    /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1856     * in migration.c
1857     */
1858    cpu_synchronize_all_post_init();
1859
1860    qemu_announce_self(&mis->announce_timer, migrate_announce_params());
1861
1862    /* Make sure all file formats flush their mutable metadata.
1863     * If we get an error here, just don't restart the VM yet. */
1864    bdrv_invalidate_cache_all(&local_err);
1865    if (local_err) {
1866        error_report_err(local_err);
1867        local_err = NULL;
1868        autostart = false;
1869    }
1870
1871    trace_loadvm_postcopy_handle_run_cpu_sync();
1872    cpu_synchronize_all_post_init();
1873
1874    trace_loadvm_postcopy_handle_run_vmstart();
1875
1876    dirty_bitmap_mig_before_vm_start();
1877
1878    if (autostart) {
1879        /* Hold onto your hats, starting the CPU */
1880        vm_start();
1881    } else {
1882        /* leave it paused and let management decide when to start the CPU */
1883        runstate_set(RUN_STATE_PAUSED);
1884    }
1885
1886    qemu_bh_delete(data->bh);
1887    g_free(data);
1888}
1889
1890/* After all discards we can start running and asking for pages */
1891static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1892{
1893    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1894    HandleRunBhData *data;
1895
1896    trace_loadvm_postcopy_handle_run();
1897    if (ps != POSTCOPY_INCOMING_LISTENING) {
1898        error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1899        return -1;
1900    }
1901
1902    data = g_new(HandleRunBhData, 1);
1903    data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data);
1904    qemu_bh_schedule(data->bh);
1905
1906    /* We need to finish reading the stream from the package
1907     * and also stop reading anything more from the stream that loaded the
1908     * package (since it's now being read by the listener thread).
1909     * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1910     */
1911    return LOADVM_QUIT;
1912}
1913
1914static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
1915{
1916    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
1917        error_report("%s: illegal resume received", __func__);
1918        /* Don't fail the load, only for this. */
1919        return 0;
1920    }
1921
1922    /*
1923     * This means source VM is ready to resume the postcopy migration.
1924     * It's time to switch state and release the fault thread to
1925     * continue service page faults.
1926     */
1927    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
1928                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
1929    qemu_sem_post(&mis->postcopy_pause_sem_fault);
1930
1931    trace_loadvm_postcopy_handle_resume();
1932
1933    /* Tell source that "we are ready" */
1934    migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
1935
1936    return 0;
1937}
1938
1939/**
1940 * Immediately following this command is a blob of data containing an embedded
1941 * chunk of migration stream; read it and load it.
1942 *
1943 * @mis: Incoming state
1944 * @length: Length of packaged data to read
1945 *
1946 * Returns: Negative values on error
1947 *
1948 */
1949static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1950{
1951    int ret;
1952    size_t length;
1953    QIOChannelBuffer *bioc;
1954
1955    length = qemu_get_be32(mis->from_src_file);
1956    trace_loadvm_handle_cmd_packaged(length);
1957
1958    if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1959        error_report("Unreasonably large packaged state: %zu", length);
1960        return -1;
1961    }
1962
1963    bioc = qio_channel_buffer_new(length);
1964    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
1965    ret = qemu_get_buffer(mis->from_src_file,
1966                          bioc->data,
1967                          length);
1968    if (ret != length) {
1969        object_unref(OBJECT(bioc));
1970        error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
1971                     ret, length);
1972        return (ret < 0) ? ret : -EAGAIN;
1973    }
1974    bioc->usage += length;
1975    trace_loadvm_handle_cmd_packaged_received(ret);
1976
1977    QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
1978
1979    ret = qemu_loadvm_state_main(packf, mis);
1980    trace_loadvm_handle_cmd_packaged_main(ret);
1981    qemu_fclose(packf);
1982    object_unref(OBJECT(bioc));
1983
1984    return ret;
1985}
1986
1987/*
1988 * Handle request that source requests for recved_bitmap on
1989 * destination. Payload format:
1990 *
1991 * len (1 byte) + ramblock_name (<255 bytes)
1992 */
1993static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
1994                                     uint16_t len)
1995{
1996    QEMUFile *file = mis->from_src_file;
1997    RAMBlock *rb;
1998    char block_name[256];
1999    size_t cnt;
2000
2001    cnt = qemu_get_counted_string(file, block_name);
2002    if (!cnt) {
2003        error_report("%s: failed to read block name", __func__);
2004        return -EINVAL;
2005    }
2006
2007    /* Validate before using the data */
2008    if (qemu_file_get_error(file)) {
2009        return qemu_file_get_error(file);
2010    }
2011
2012    if (len != cnt + 1) {
2013        error_report("%s: invalid payload length (%d)", __func__, len);
2014        return -EINVAL;
2015    }
2016
2017    rb = qemu_ram_block_by_name(block_name);
2018    if (!rb) {
2019        error_report("%s: block '%s' not found", __func__, block_name);
2020        return -EINVAL;
2021    }
2022
2023    migrate_send_rp_recv_bitmap(mis, block_name);
2024
2025    trace_loadvm_handle_recv_bitmap(block_name);
2026
2027    return 0;
2028}
2029
2030static int loadvm_process_enable_colo(MigrationIncomingState *mis)
2031{
2032    migration_incoming_enable_colo();
2033    return colo_init_ram_cache();
2034}
2035
2036/*
2037 * Process an incoming 'QEMU_VM_COMMAND'
2038 * 0           just a normal return
2039 * LOADVM_QUIT All good, but exit the loop
2040 * <0          Error
2041 */
2042static int loadvm_process_command(QEMUFile *f)
2043{
2044    MigrationIncomingState *mis = migration_incoming_get_current();
2045    uint16_t cmd;
2046    uint16_t len;
2047    uint32_t tmp32;
2048
2049    cmd = qemu_get_be16(f);
2050    len = qemu_get_be16(f);
2051
2052    /* Check validity before continue processing of cmds */
2053    if (qemu_file_get_error(f)) {
2054        return qemu_file_get_error(f);
2055    }
2056
2057    trace_loadvm_process_command(cmd, len);
2058    if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
2059        error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
2060        return -EINVAL;
2061    }
2062
2063    if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
2064        error_report("%s received with bad length - expecting %zu, got %d",
2065                     mig_cmd_args[cmd].name,
2066                     (size_t)mig_cmd_args[cmd].len, len);
2067        return -ERANGE;
2068    }
2069
2070    switch (cmd) {
2071    case MIG_CMD_OPEN_RETURN_PATH:
2072        if (mis->to_src_file) {
2073            error_report("CMD_OPEN_RETURN_PATH called when RP already open");
2074            /* Not really a problem, so don't give up */
2075            return 0;
2076        }
2077        mis->to_src_file = qemu_file_get_return_path(f);
2078        if (!mis->to_src_file) {
2079            error_report("CMD_OPEN_RETURN_PATH failed");
2080            return -1;
2081        }
2082        break;
2083
2084    case MIG_CMD_PING:
2085        tmp32 = qemu_get_be32(f);
2086        trace_loadvm_process_command_ping(tmp32);
2087        if (!mis->to_src_file) {
2088            error_report("CMD_PING (0x%x) received with no return path",
2089                         tmp32);
2090            return -1;
2091        }
2092        migrate_send_rp_pong(mis, tmp32);
2093        break;
2094
2095    case MIG_CMD_PACKAGED:
2096        return loadvm_handle_cmd_packaged(mis);
2097
2098    case MIG_CMD_POSTCOPY_ADVISE:
2099        return loadvm_postcopy_handle_advise(mis, len);
2100
2101    case MIG_CMD_POSTCOPY_LISTEN:
2102        return loadvm_postcopy_handle_listen(mis);
2103
2104    case MIG_CMD_POSTCOPY_RUN:
2105        return loadvm_postcopy_handle_run(mis);
2106
2107    case MIG_CMD_POSTCOPY_RAM_DISCARD:
2108        return loadvm_postcopy_ram_handle_discard(mis, len);
2109
2110    case MIG_CMD_POSTCOPY_RESUME:
2111        return loadvm_postcopy_handle_resume(mis);
2112
2113    case MIG_CMD_RECV_BITMAP:
2114        return loadvm_handle_recv_bitmap(mis, len);
2115
2116    case MIG_CMD_ENABLE_COLO:
2117        return loadvm_process_enable_colo(mis);
2118    }
2119
2120    return 0;
2121}
2122
2123/*
2124 * Read a footer off the wire and check that it matches the expected section
2125 *
2126 * Returns: true if the footer was good
2127 *          false if there is a problem (and calls error_report to say why)
2128 */
2129static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
2130{
2131    int ret;
2132    uint8_t read_mark;
2133    uint32_t read_section_id;
2134
2135    if (!migrate_get_current()->send_section_footer) {
2136        /* No footer to check */
2137        return true;
2138    }
2139
2140    read_mark = qemu_get_byte(f);
2141
2142    ret = qemu_file_get_error(f);
2143    if (ret) {
2144        error_report("%s: Read section footer failed: %d",
2145                     __func__, ret);
2146        return false;
2147    }
2148
2149    if (read_mark != QEMU_VM_SECTION_FOOTER) {
2150        error_report("Missing section footer for %s", se->idstr);
2151        return false;
2152    }
2153
2154    read_section_id = qemu_get_be32(f);
2155    if (read_section_id != se->load_section_id) {
2156        error_report("Mismatched section id in footer for %s -"
2157                     " read 0x%x expected 0x%x",
2158                     se->idstr, read_section_id, se->load_section_id);
2159        return false;
2160    }
2161
2162    /* All good */
2163    return true;
2164}
2165
2166static int
2167qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
2168{
2169    uint32_t instance_id, version_id, section_id;
2170    SaveStateEntry *se;
2171    char idstr[256];
2172    int ret;
2173
2174    /* Read section start */
2175    section_id = qemu_get_be32(f);
2176    if (!qemu_get_counted_string(f, idstr)) {
2177        error_report("Unable to read ID string for section %u",
2178                     section_id);
2179        return -EINVAL;
2180    }
2181    instance_id = qemu_get_be32(f);
2182    version_id = qemu_get_be32(f);
2183
2184    ret = qemu_file_get_error(f);
2185    if (ret) {
2186        error_report("%s: Failed to read instance/version ID: %d",
2187                     __func__, ret);
2188        return ret;
2189    }
2190
2191    trace_qemu_loadvm_state_section_startfull(section_id, idstr,
2192            instance_id, version_id);
2193    /* Find savevm section */
2194    se = find_se(idstr, instance_id);
2195    if (se == NULL) {
2196        error_report("Unknown savevm section or instance '%s' %d. "
2197                     "Make sure that your current VM setup matches your "
2198                     "saved VM setup, including any hotplugged devices",
2199                     idstr, instance_id);
2200        return -EINVAL;
2201    }
2202
2203    /* Validate version */
2204    if (version_id > se->version_id) {
2205        error_report("savevm: unsupported version %d for '%s' v%d",
2206                     version_id, idstr, se->version_id);
2207        return -EINVAL;
2208    }
2209    se->load_version_id = version_id;
2210    se->load_section_id = section_id;
2211
2212    /* Validate if it is a device's state */
2213    if (xen_enabled() && se->is_ram) {
2214        error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
2215        return -EINVAL;
2216    }
2217
2218    ret = vmstate_load(f, se);
2219    if (ret < 0) {
2220        error_report("error while loading state for instance 0x%x of"
2221                     " device '%s'", instance_id, idstr);
2222        return ret;
2223    }
2224    if (!check_section_footer(f, se)) {
2225        return -EINVAL;
2226    }
2227
2228    return 0;
2229}
2230
2231static int
2232qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
2233{
2234    uint32_t section_id;
2235    SaveStateEntry *se;
2236    int ret;
2237
2238    section_id = qemu_get_be32(f);
2239
2240    ret = qemu_file_get_error(f);
2241    if (ret) {
2242        error_report("%s: Failed to read section ID: %d",
2243                     __func__, ret);
2244        return ret;
2245    }
2246
2247    trace_qemu_loadvm_state_section_partend(section_id);
2248    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2249        if (se->load_section_id == section_id) {
2250            break;
2251        }
2252    }
2253    if (se == NULL) {
2254        error_report("Unknown savevm section %d", section_id);
2255        return -EINVAL;
2256    }
2257
2258    ret = vmstate_load(f, se);
2259    if (ret < 0) {
2260        error_report("error while loading state section id %d(%s)",
2261                     section_id, se->idstr);
2262        return ret;
2263    }
2264    if (!check_section_footer(f, se)) {
2265        return -EINVAL;
2266    }
2267
2268    return 0;
2269}
2270
2271static int qemu_loadvm_state_setup(QEMUFile *f)
2272{
2273    SaveStateEntry *se;
2274    int ret;
2275
2276    trace_loadvm_state_setup();
2277    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2278        if (!se->ops || !se->ops->load_setup) {
2279            continue;
2280        }
2281        if (se->ops && se->ops->is_active) {
2282            if (!se->ops->is_active(se->opaque)) {
2283                continue;
2284            }
2285        }
2286
2287        ret = se->ops->load_setup(f, se->opaque);
2288        if (ret < 0) {
2289            qemu_file_set_error(f, ret);
2290            error_report("Load state of device %s failed", se->idstr);
2291            return ret;
2292        }
2293    }
2294    return 0;
2295}
2296
2297void qemu_loadvm_state_cleanup(void)
2298{
2299    SaveStateEntry *se;
2300
2301    trace_loadvm_state_cleanup();
2302    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2303        if (se->ops && se->ops->load_cleanup) {
2304            se->ops->load_cleanup(se->opaque);
2305        }
2306    }
2307}
2308
2309/* Return true if we should continue the migration, or false. */
2310static bool postcopy_pause_incoming(MigrationIncomingState *mis)
2311{
2312    trace_postcopy_pause_incoming();
2313
2314    /* Clear the triggered bit to allow one recovery */
2315    mis->postcopy_recover_triggered = false;
2316
2317    assert(mis->from_src_file);
2318    qemu_file_shutdown(mis->from_src_file);
2319    qemu_fclose(mis->from_src_file);
2320    mis->from_src_file = NULL;
2321
2322    assert(mis->to_src_file);
2323    qemu_file_shutdown(mis->to_src_file);
2324    qemu_mutex_lock(&mis->rp_mutex);
2325    qemu_fclose(mis->to_src_file);
2326    mis->to_src_file = NULL;
2327    qemu_mutex_unlock(&mis->rp_mutex);
2328
2329    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2330                      MIGRATION_STATUS_POSTCOPY_PAUSED);
2331
2332    /* Notify the fault thread for the invalidated file handle */
2333    postcopy_fault_thread_notify(mis);
2334
2335    error_report("Detected IO failure for postcopy. "
2336                 "Migration paused.");
2337
2338    while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2339        qemu_sem_wait(&mis->postcopy_pause_sem_dst);
2340    }
2341
2342    trace_postcopy_pause_incoming_continued();
2343
2344    return true;
2345}
2346
2347int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
2348{
2349    uint8_t section_type;
2350    int ret = 0;
2351
2352retry:
2353    while (true) {
2354        section_type = qemu_get_byte(f);
2355
2356        if (qemu_file_get_error(f)) {
2357            ret = qemu_file_get_error(f);
2358            break;
2359        }
2360
2361        trace_qemu_loadvm_state_section(section_type);
2362        switch (section_type) {
2363        case QEMU_VM_SECTION_START:
2364        case QEMU_VM_SECTION_FULL:
2365            ret = qemu_loadvm_section_start_full(f, mis);
2366            if (ret < 0) {
2367                goto out;
2368            }
2369            break;
2370        case QEMU_VM_SECTION_PART:
2371        case QEMU_VM_SECTION_END:
2372            ret = qemu_loadvm_section_part_end(f, mis);
2373            if (ret < 0) {
2374                goto out;
2375            }
2376            break;
2377        case QEMU_VM_COMMAND:
2378            ret = loadvm_process_command(f);
2379            trace_qemu_loadvm_state_section_command(ret);
2380            if ((ret < 0) || (ret & LOADVM_QUIT)) {
2381                goto out;
2382            }
2383            break;
2384        case QEMU_VM_EOF:
2385            /* This is the end of migration */
2386            goto out;
2387        default:
2388            error_report("Unknown savevm section type %d", section_type);
2389            ret = -EINVAL;
2390            goto out;
2391        }
2392    }
2393
2394out:
2395    if (ret < 0) {
2396        qemu_file_set_error(f, ret);
2397
2398        /*
2399         * If we are during an active postcopy, then we pause instead
2400         * of bail out to at least keep the VM's dirty data.  Note
2401         * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2402         * during which we're still receiving device states and we
2403         * still haven't yet started the VM on destination.
2404         */
2405        if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2406            postcopy_pause_incoming(mis)) {
2407            /* Reset f to point to the newly created channel */
2408            f = mis->from_src_file;
2409            goto retry;
2410        }
2411    }
2412    return ret;
2413}
2414
2415int qemu_loadvm_state(QEMUFile *f)
2416{
2417    MigrationIncomingState *mis = migration_incoming_get_current();
2418    Error *local_err = NULL;
2419    unsigned int v;
2420    int ret;
2421
2422    if (qemu_savevm_state_blocked(&local_err)) {
2423        error_report_err(local_err);
2424        return -EINVAL;
2425    }
2426
2427    v = qemu_get_be32(f);
2428    if (v != QEMU_VM_FILE_MAGIC) {
2429        error_report("Not a migration stream");
2430        return -EINVAL;
2431    }
2432
2433    v = qemu_get_be32(f);
2434    if (v == QEMU_VM_FILE_VERSION_COMPAT) {
2435        error_report("SaveVM v2 format is obsolete and don't work anymore");
2436        return -ENOTSUP;
2437    }
2438    if (v != QEMU_VM_FILE_VERSION) {
2439        error_report("Unsupported migration stream version");
2440        return -ENOTSUP;
2441    }
2442
2443    if (qemu_loadvm_state_setup(f) != 0) {
2444        return -EINVAL;
2445    }
2446
2447    if (migrate_get_current()->send_configuration) {
2448        if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
2449            error_report("Configuration section missing");
2450            qemu_loadvm_state_cleanup();
2451            return -EINVAL;
2452        }
2453        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
2454
2455        if (ret) {
2456            qemu_loadvm_state_cleanup();
2457            return ret;
2458        }
2459    }
2460
2461    cpu_synchronize_all_pre_loadvm();
2462
2463    ret = qemu_loadvm_state_main(f, mis);
2464    qemu_event_set(&mis->main_thread_load_event);
2465
2466    trace_qemu_loadvm_state_post_main(ret);
2467
2468    if (mis->have_listen_thread) {
2469        /* Listen thread still going, can't clean up yet */
2470        return ret;
2471    }
2472
2473    if (ret == 0) {
2474        ret = qemu_file_get_error(f);
2475    }
2476
2477    /*
2478     * Try to read in the VMDESC section as well, so that dumping tools that
2479     * intercept our migration stream have the chance to see it.
2480     */
2481
2482    /* We've got to be careful; if we don't read the data and just shut the fd
2483     * then the sender can error if we close while it's still sending.
2484     * We also mustn't read data that isn't there; some transports (RDMA)
2485     * will stall waiting for that data when the source has already closed.
2486     */
2487    if (ret == 0 && should_send_vmdesc()) {
2488        uint8_t *buf;
2489        uint32_t size;
2490        uint8_t  section_type = qemu_get_byte(f);
2491
2492        if (section_type != QEMU_VM_VMDESCRIPTION) {
2493            error_report("Expected vmdescription section, but got %d",
2494                         section_type);
2495            /*
2496             * It doesn't seem worth failing at this point since
2497             * we apparently have an otherwise valid VM state
2498             */
2499        } else {
2500            buf = g_malloc(0x1000);
2501            size = qemu_get_be32(f);
2502
2503            while (size > 0) {
2504                uint32_t read_chunk = MIN(size, 0x1000);
2505                qemu_get_buffer(f, buf, read_chunk);
2506                size -= read_chunk;
2507            }
2508            g_free(buf);
2509        }
2510    }
2511
2512    qemu_loadvm_state_cleanup();
2513    cpu_synchronize_all_post_init();
2514
2515    return ret;
2516}
2517
2518int qemu_load_device_state(QEMUFile *f)
2519{
2520    MigrationIncomingState *mis = migration_incoming_get_current();
2521    int ret;
2522
2523    /* Load QEMU_VM_SECTION_FULL section */
2524    ret = qemu_loadvm_state_main(f, mis);
2525    if (ret < 0) {
2526        error_report("Failed to load device state: %d", ret);
2527        return ret;
2528    }
2529
2530    cpu_synchronize_all_post_init();
2531    return 0;
2532}
2533
2534int save_snapshot(const char *name, Error **errp)
2535{
2536    BlockDriverState *bs, *bs1;
2537    QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
2538    int ret = -1;
2539    QEMUFile *f;
2540    int saved_vm_running;
2541    uint64_t vm_state_size;
2542    qemu_timeval tv;
2543    struct tm tm;
2544    AioContext *aio_context;
2545
2546    if (migration_is_blocked(errp)) {
2547        return false;
2548    }
2549
2550    if (!replay_can_snapshot()) {
2551        error_setg(errp, "Record/replay does not allow making snapshot "
2552                   "right now. Try once more later.");
2553        return ret;
2554    }
2555
2556    if (!bdrv_all_can_snapshot(&bs)) {
2557        error_setg(errp, "Device '%s' is writable but does not support "
2558                   "snapshots", bdrv_get_device_name(bs));
2559        return ret;
2560    }
2561
2562    /* Delete old snapshots of the same name */
2563    if (name) {
2564        ret = bdrv_all_delete_snapshot(name, &bs1, errp);
2565        if (ret < 0) {
2566            error_prepend(errp, "Error while deleting snapshot on device "
2567                          "'%s': ", bdrv_get_device_name(bs1));
2568            return ret;
2569        }
2570    }
2571
2572    bs = bdrv_all_find_vmstate_bs();
2573    if (bs == NULL) {
2574        error_setg(errp, "No block device can accept snapshots");
2575        return ret;
2576    }
2577    aio_context = bdrv_get_aio_context(bs);
2578
2579    saved_vm_running = runstate_is_running();
2580
2581    ret = global_state_store();
2582    if (ret) {
2583        error_setg(errp, "Error saving global state");
2584        return ret;
2585    }
2586    vm_stop(RUN_STATE_SAVE_VM);
2587
2588    bdrv_drain_all_begin();
2589
2590    aio_context_acquire(aio_context);
2591
2592    memset(sn, 0, sizeof(*sn));
2593
2594    /* fill auxiliary fields */
2595    qemu_gettimeofday(&tv);
2596    sn->date_sec = tv.tv_sec;
2597    sn->date_nsec = tv.tv_usec * 1000;
2598    sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
2599
2600    if (name) {
2601        ret = bdrv_snapshot_find(bs, old_sn, name);
2602        if (ret >= 0) {
2603            pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2604            pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2605        } else {
2606            pstrcpy(sn->name, sizeof(sn->name), name);
2607        }
2608    } else {
2609        /* cast below needed for OpenBSD where tv_sec is still 'long' */
2610        localtime_r((const time_t *)&tv.tv_sec, &tm);
2611        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
2612    }
2613
2614    /* save the VM state */
2615    f = qemu_fopen_bdrv(bs, 1);
2616    if (!f) {
2617        error_setg(errp, "Could not open VM state file");
2618        goto the_end;
2619    }
2620    ret = qemu_savevm_state(f, errp);
2621    vm_state_size = qemu_ftell(f);
2622    qemu_fclose(f);
2623    if (ret < 0) {
2624        goto the_end;
2625    }
2626
2627    /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
2628     * for itself.  BDRV_POLL_WHILE() does not support nested locking because
2629     * it only releases the lock once.  Therefore synchronous I/O will deadlock
2630     * unless we release the AioContext before bdrv_all_create_snapshot().
2631     */
2632    aio_context_release(aio_context);
2633    aio_context = NULL;
2634
2635    ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2636    if (ret < 0) {
2637        error_setg(errp, "Error while creating snapshot on '%s'",
2638                   bdrv_get_device_name(bs));
2639        goto the_end;
2640    }
2641
2642    ret = 0;
2643
2644 the_end:
2645    if (aio_context) {
2646        aio_context_release(aio_context);
2647    }
2648
2649    bdrv_drain_all_end();
2650
2651    if (saved_vm_running) {
2652        vm_start();
2653    }
2654    return ret;
2655}
2656
2657void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
2658                                Error **errp)
2659{
2660    QEMUFile *f;
2661    QIOChannelFile *ioc;
2662    int saved_vm_running;
2663    int ret;
2664
2665    if (!has_live) {
2666        /* live default to true so old version of Xen tool stack can have a
2667         * successfull live migration */
2668        live = true;
2669    }
2670
2671    saved_vm_running = runstate_is_running();
2672    vm_stop(RUN_STATE_SAVE_VM);
2673    global_state_store_running();
2674
2675    ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
2676    if (!ioc) {
2677        goto the_end;
2678    }
2679    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
2680    f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
2681    object_unref(OBJECT(ioc));
2682    ret = qemu_save_device_state(f);
2683    if (ret < 0 || qemu_fclose(f) < 0) {
2684        error_setg(errp, QERR_IO_ERROR);
2685    } else {
2686        /* libxl calls the QMP command "stop" before calling
2687         * "xen-save-devices-state" and in case of migration failure, libxl
2688         * would call "cont".
2689         * So call bdrv_inactivate_all (release locks) here to let the other
2690         * side of the migration take controle of the images.
2691         */
2692        if (live && !saved_vm_running) {
2693            ret = bdrv_inactivate_all();
2694            if (ret) {
2695                error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
2696                           __func__, ret);
2697            }
2698        }
2699    }
2700
2701 the_end:
2702    if (saved_vm_running) {
2703        vm_start();
2704    }
2705}
2706
2707void qmp_xen_load_devices_state(const char *filename, Error **errp)
2708{
2709    QEMUFile *f;
2710    QIOChannelFile *ioc;
2711    int ret;
2712
2713    /* Guest must be paused before loading the device state; the RAM state
2714     * will already have been loaded by xc
2715     */
2716    if (runstate_is_running()) {
2717        error_setg(errp, "Cannot update device state while vm is running");
2718        return;
2719    }
2720    vm_stop(RUN_STATE_RESTORE_VM);
2721
2722    ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
2723    if (!ioc) {
2724        return;
2725    }
2726    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
2727    f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
2728    object_unref(OBJECT(ioc));
2729
2730    ret = qemu_loadvm_state(f);
2731    qemu_fclose(f);
2732    if (ret < 0) {
2733        error_setg(errp, QERR_IO_ERROR);
2734    }
2735    migration_incoming_state_destroy();
2736}
2737
2738int load_snapshot(const char *name, Error **errp)
2739{
2740    BlockDriverState *bs, *bs_vm_state;
2741    QEMUSnapshotInfo sn;
2742    QEMUFile *f;
2743    int ret;
2744    AioContext *aio_context;
2745    MigrationIncomingState *mis = migration_incoming_get_current();
2746
2747    if (!replay_can_snapshot()) {
2748        error_setg(errp, "Record/replay does not allow loading snapshot "
2749                   "right now. Try once more later.");
2750        return -EINVAL;
2751    }
2752
2753    if (!bdrv_all_can_snapshot(&bs)) {
2754        error_setg(errp,
2755                   "Device '%s' is writable but does not support snapshots",
2756                   bdrv_get_device_name(bs));
2757        return -ENOTSUP;
2758    }
2759    ret = bdrv_all_find_snapshot(name, &bs);
2760    if (ret < 0) {
2761        error_setg(errp,
2762                   "Device '%s' does not have the requested snapshot '%s'",
2763                   bdrv_get_device_name(bs), name);
2764        return ret;
2765    }
2766
2767    bs_vm_state = bdrv_all_find_vmstate_bs();
2768    if (!bs_vm_state) {
2769        error_setg(errp, "No block device supports snapshots");
2770        return -ENOTSUP;
2771    }
2772    aio_context = bdrv_get_aio_context(bs_vm_state);
2773
2774    /* Don't even try to load empty VM states */
2775    aio_context_acquire(aio_context);
2776    ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2777    aio_context_release(aio_context);
2778    if (ret < 0) {
2779        return ret;
2780    } else if (sn.vm_state_size == 0) {
2781        error_setg(errp, "This is a disk-only snapshot. Revert to it "
2782                   " offline using qemu-img");
2783        return -EINVAL;
2784    }
2785
2786    /* Flush all IO requests so they don't interfere with the new state.  */
2787    bdrv_drain_all_begin();
2788
2789    ret = bdrv_all_goto_snapshot(name, &bs, errp);
2790    if (ret < 0) {
2791        error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
2792                      name, bdrv_get_device_name(bs));
2793        goto err_drain;
2794    }
2795
2796    /* restore the VM state */
2797    f = qemu_fopen_bdrv(bs_vm_state, 0);
2798    if (!f) {
2799        error_setg(errp, "Could not open VM state file");
2800        ret = -EINVAL;
2801        goto err_drain;
2802    }
2803
2804    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
2805    mis->from_src_file = f;
2806
2807    aio_context_acquire(aio_context);
2808    ret = qemu_loadvm_state(f);
2809    migration_incoming_state_destroy();
2810    aio_context_release(aio_context);
2811
2812    bdrv_drain_all_end();
2813
2814    if (ret < 0) {
2815        error_setg(errp, "Error %d while loading VM state", ret);
2816        return ret;
2817    }
2818
2819    return 0;
2820
2821err_drain:
2822    bdrv_drain_all_end();
2823    return ret;
2824}
2825
2826void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2827{
2828    qemu_ram_set_idstr(mr->ram_block,
2829                       memory_region_name(mr), dev);
2830    qemu_ram_set_migratable(mr->ram_block);
2831}
2832
2833void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2834{
2835    qemu_ram_unset_idstr(mr->ram_block);
2836    qemu_ram_unset_migratable(mr->ram_block);
2837}
2838
2839void vmstate_register_ram_global(MemoryRegion *mr)
2840{
2841    vmstate_register_ram(mr, NULL);
2842}
2843
2844bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
2845{
2846    /* check needed if --only-migratable is specified */
2847    if (!only_migratable) {
2848        return true;
2849    }
2850
2851    return !(vmsd && vmsd->unmigratable);
2852}
2853