qemu/migration/savevm.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009-2015 Red Hat Inc
   6 *
   7 * Authors:
   8 *  Juan Quintela <quintela@redhat.com>
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include "hw/boards.h"
  31#include "hw/xen/xen.h"
  32#include "net/net.h"
  33#include "migration.h"
  34#include "migration/snapshot.h"
  35#include "migration/misc.h"
  36#include "migration/register.h"
  37#include "migration/global_state.h"
  38#include "ram.h"
  39#include "qemu-file-channel.h"
  40#include "qemu-file.h"
  41#include "savevm.h"
  42#include "postcopy-ram.h"
  43#include "qapi/error.h"
  44#include "qapi/qapi-commands-migration.h"
  45#include "qapi/qapi-commands-misc.h"
  46#include "qapi/qmp/qerror.h"
  47#include "qemu/error-report.h"
  48#include "sysemu/cpus.h"
  49#include "exec/memory.h"
  50#include "exec/target_page.h"
  51#include "trace.h"
  52#include "qemu/iov.h"
  53#include "block/snapshot.h"
  54#include "qemu/cutils.h"
  55#include "io/channel-buffer.h"
  56#include "io/channel-file.h"
  57#include "sysemu/replay.h"
  58#include "qjson.h"
  59#include "migration/colo.h"
  60#include "qemu/bitmap.h"
  61#include "net/announce.h"
  62
  63const unsigned int postcopy_ram_discard_version = 0;
  64
  65/* Subcommands for QEMU_VM_COMMAND */
  66enum qemu_vm_cmd {
  67    MIG_CMD_INVALID = 0,   /* Must be 0 */
  68    MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
  69    MIG_CMD_PING,              /* Request a PONG on the RP */
  70
  71    MIG_CMD_POSTCOPY_ADVISE,       /* Prior to any page transfers, just
  72                                      warn we might want to do PC */
  73    MIG_CMD_POSTCOPY_LISTEN,       /* Start listening for incoming
  74                                      pages as it's running. */
  75    MIG_CMD_POSTCOPY_RUN,          /* Start execution */
  76
  77    MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
  78                                      were previously sent during
  79                                      precopy but are dirty. */
  80    MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
  81    MIG_CMD_ENABLE_COLO,       /* Enable COLO */
  82    MIG_CMD_POSTCOPY_RESUME,   /* resume postcopy on dest */
  83    MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
  84    MIG_CMD_MAX
  85};
  86
  87#define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  88static struct mig_cmd_args {
  89    ssize_t     len; /* -1 = variable */
  90    const char *name;
  91} mig_cmd_args[] = {
  92    [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
  93    [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
  94    [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
  95    [MIG_CMD_POSTCOPY_ADVISE]  = { .len = -1, .name = "POSTCOPY_ADVISE" },
  96    [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
  97    [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
  98    [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  99                                   .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
 100    [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
 101    [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
 102    [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
 103    [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
 104};
 105
 106/* Note for MIG_CMD_POSTCOPY_ADVISE:
 107 * The format of arguments is depending on postcopy mode:
 108 * - postcopy RAM only
 109 *   uint64_t host page size
 110 *   uint64_t taget page size
 111 *
 112 * - postcopy RAM and postcopy dirty bitmaps
 113 *   format is the same as for postcopy RAM only
 114 *
 115 * - postcopy dirty bitmaps only
 116 *   Nothing. Command length field is 0.
 117 *
 118 * Be careful: adding a new postcopy entity with some other parameters should
 119 * not break format self-description ability. Good way is to introduce some
 120 * generic extendable format with an exception for two old entities.
 121 */
 122
 123/***********************************************************/
 124/* savevm/loadvm support */
 125
 126static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
 127                                   int64_t pos)
 128{
 129    int ret;
 130    QEMUIOVector qiov;
 131
 132    qemu_iovec_init_external(&qiov, iov, iovcnt);
 133    ret = bdrv_writev_vmstate(opaque, &qiov, pos);
 134    if (ret < 0) {
 135        return ret;
 136    }
 137
 138    return qiov.size;
 139}
 140
 141static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
 142                                size_t size)
 143{
 144    return bdrv_load_vmstate(opaque, buf, pos, size);
 145}
 146
 147static int bdrv_fclose(void *opaque)
 148{
 149    return bdrv_flush(opaque);
 150}
 151
 152static const QEMUFileOps bdrv_read_ops = {
 153    .get_buffer = block_get_buffer,
 154    .close =      bdrv_fclose
 155};
 156
 157static const QEMUFileOps bdrv_write_ops = {
 158    .writev_buffer  = block_writev_buffer,
 159    .close          = bdrv_fclose
 160};
 161
 162static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 163{
 164    if (is_writable) {
 165        return qemu_fopen_ops(bs, &bdrv_write_ops);
 166    }
 167    return qemu_fopen_ops(bs, &bdrv_read_ops);
 168}
 169
 170
 171/* QEMUFile timer support.
 172 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
 173 */
 174
 175void timer_put(QEMUFile *f, QEMUTimer *ts)
 176{
 177    uint64_t expire_time;
 178
 179    expire_time = timer_expire_time_ns(ts);
 180    qemu_put_be64(f, expire_time);
 181}
 182
 183void timer_get(QEMUFile *f, QEMUTimer *ts)
 184{
 185    uint64_t expire_time;
 186
 187    expire_time = qemu_get_be64(f);
 188    if (expire_time != -1) {
 189        timer_mod_ns(ts, expire_time);
 190    } else {
 191        timer_del(ts);
 192    }
 193}
 194
 195
 196/* VMState timer support.
 197 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
 198 */
 199
 200static int get_timer(QEMUFile *f, void *pv, size_t size,
 201                     const VMStateField *field)
 202{
 203    QEMUTimer *v = pv;
 204    timer_get(f, v);
 205    return 0;
 206}
 207
 208static int put_timer(QEMUFile *f, void *pv, size_t size,
 209                     const VMStateField *field, QJSON *vmdesc)
 210{
 211    QEMUTimer *v = pv;
 212    timer_put(f, v);
 213
 214    return 0;
 215}
 216
 217const VMStateInfo vmstate_info_timer = {
 218    .name = "timer",
 219    .get  = get_timer,
 220    .put  = put_timer,
 221};
 222
 223
 224typedef struct CompatEntry {
 225    char idstr[256];
 226    int instance_id;
 227} CompatEntry;
 228
 229typedef struct SaveStateEntry {
 230    QTAILQ_ENTRY(SaveStateEntry) entry;
 231    char idstr[256];
 232    int instance_id;
 233    int alias_id;
 234    int version_id;
 235    /* version id read from the stream */
 236    int load_version_id;
 237    int section_id;
 238    /* section id read from the stream */
 239    int load_section_id;
 240    const SaveVMHandlers *ops;
 241    const VMStateDescription *vmsd;
 242    void *opaque;
 243    CompatEntry *compat;
 244    int is_ram;
 245} SaveStateEntry;
 246
 247typedef struct SaveState {
 248    QTAILQ_HEAD(, SaveStateEntry) handlers;
 249    int global_section_id;
 250    uint32_t len;
 251    const char *name;
 252    uint32_t target_page_bits;
 253    uint32_t caps_count;
 254    MigrationCapability *capabilities;
 255} SaveState;
 256
 257static SaveState savevm_state = {
 258    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
 259    .global_section_id = 0,
 260};
 261
 262static bool should_validate_capability(int capability)
 263{
 264    assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
 265    /* Validate only new capabilities to keep compatibility. */
 266    switch (capability) {
 267    case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
 268        return true;
 269    default:
 270        return false;
 271    }
 272}
 273
 274static uint32_t get_validatable_capabilities_count(void)
 275{
 276    MigrationState *s = migrate_get_current();
 277    uint32_t result = 0;
 278    int i;
 279    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 280        if (should_validate_capability(i) && s->enabled_capabilities[i]) {
 281            result++;
 282        }
 283    }
 284    return result;
 285}
 286
 287static int configuration_pre_save(void *opaque)
 288{
 289    SaveState *state = opaque;
 290    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 291    MigrationState *s = migrate_get_current();
 292    int i, j;
 293
 294    state->len = strlen(current_name);
 295    state->name = current_name;
 296    state->target_page_bits = qemu_target_page_bits();
 297
 298    state->caps_count = get_validatable_capabilities_count();
 299    state->capabilities = g_renew(MigrationCapability, state->capabilities,
 300                                  state->caps_count);
 301    for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 302        if (should_validate_capability(i) && s->enabled_capabilities[i]) {
 303            state->capabilities[j++] = i;
 304        }
 305    }
 306
 307    return 0;
 308}
 309
 310static int configuration_pre_load(void *opaque)
 311{
 312    SaveState *state = opaque;
 313
 314    /* If there is no target-page-bits subsection it means the source
 315     * predates the variable-target-page-bits support and is using the
 316     * minimum possible value for this CPU.
 317     */
 318    state->target_page_bits = qemu_target_page_bits_min();
 319    return 0;
 320}
 321
 322static bool configuration_validate_capabilities(SaveState *state)
 323{
 324    bool ret = true;
 325    MigrationState *s = migrate_get_current();
 326    unsigned long *source_caps_bm;
 327    int i;
 328
 329    source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
 330    for (i = 0; i < state->caps_count; i++) {
 331        MigrationCapability capability = state->capabilities[i];
 332        set_bit(capability, source_caps_bm);
 333    }
 334
 335    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 336        bool source_state, target_state;
 337        if (!should_validate_capability(i)) {
 338            continue;
 339        }
 340        source_state = test_bit(i, source_caps_bm);
 341        target_state = s->enabled_capabilities[i];
 342        if (source_state != target_state) {
 343            error_report("Capability %s is %s, but received capability is %s",
 344                         MigrationCapability_str(i),
 345                         target_state ? "on" : "off",
 346                         source_state ? "on" : "off");
 347            ret = false;
 348            /* Don't break here to report all failed capabilities */
 349        }
 350    }
 351
 352    g_free(source_caps_bm);
 353    return ret;
 354}
 355
 356static int configuration_post_load(void *opaque, int version_id)
 357{
 358    SaveState *state = opaque;
 359    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 360
 361    if (strncmp(state->name, current_name, state->len) != 0) {
 362        error_report("Machine type received is '%.*s' and local is '%s'",
 363                     (int) state->len, state->name, current_name);
 364        return -EINVAL;
 365    }
 366
 367    if (state->target_page_bits != qemu_target_page_bits()) {
 368        error_report("Received TARGET_PAGE_BITS is %d but local is %d",
 369                     state->target_page_bits, qemu_target_page_bits());
 370        return -EINVAL;
 371    }
 372
 373    if (!configuration_validate_capabilities(state)) {
 374        return -EINVAL;
 375    }
 376
 377    return 0;
 378}
 379
 380static int get_capability(QEMUFile *f, void *pv, size_t size,
 381                          const VMStateField *field)
 382{
 383    MigrationCapability *capability = pv;
 384    char capability_str[UINT8_MAX + 1];
 385    uint8_t len;
 386    int i;
 387
 388    len = qemu_get_byte(f);
 389    qemu_get_buffer(f, (uint8_t *)capability_str, len);
 390    capability_str[len] = '\0';
 391    for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
 392        if (!strcmp(MigrationCapability_str(i), capability_str)) {
 393            *capability = i;
 394            return 0;
 395        }
 396    }
 397    error_report("Received unknown capability %s", capability_str);
 398    return -EINVAL;
 399}
 400
 401static int put_capability(QEMUFile *f, void *pv, size_t size,
 402                          const VMStateField *field, QJSON *vmdesc)
 403{
 404    MigrationCapability *capability = pv;
 405    const char *capability_str = MigrationCapability_str(*capability);
 406    size_t len = strlen(capability_str);
 407    assert(len <= UINT8_MAX);
 408
 409    qemu_put_byte(f, len);
 410    qemu_put_buffer(f, (uint8_t *)capability_str, len);
 411    return 0;
 412}
 413
 414static const VMStateInfo vmstate_info_capability = {
 415    .name = "capability",
 416    .get  = get_capability,
 417    .put  = put_capability,
 418};
 419
 420/* The target-page-bits subsection is present only if the
 421 * target page size is not the same as the default (ie the
 422 * minimum page size for a variable-page-size guest CPU).
 423 * If it is present then it contains the actual target page
 424 * bits for the machine, and migration will fail if the
 425 * two ends don't agree about it.
 426 */
 427static bool vmstate_target_page_bits_needed(void *opaque)
 428{
 429    return qemu_target_page_bits()
 430        > qemu_target_page_bits_min();
 431}
 432
 433static const VMStateDescription vmstate_target_page_bits = {
 434    .name = "configuration/target-page-bits",
 435    .version_id = 1,
 436    .minimum_version_id = 1,
 437    .needed = vmstate_target_page_bits_needed,
 438    .fields = (VMStateField[]) {
 439        VMSTATE_UINT32(target_page_bits, SaveState),
 440        VMSTATE_END_OF_LIST()
 441    }
 442};
 443
 444static bool vmstate_capabilites_needed(void *opaque)
 445{
 446    return get_validatable_capabilities_count() > 0;
 447}
 448
 449static const VMStateDescription vmstate_capabilites = {
 450    .name = "configuration/capabilities",
 451    .version_id = 1,
 452    .minimum_version_id = 1,
 453    .needed = vmstate_capabilites_needed,
 454    .fields = (VMStateField[]) {
 455        VMSTATE_UINT32_V(caps_count, SaveState, 1),
 456        VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
 457                                    vmstate_info_capability,
 458                                    MigrationCapability),
 459        VMSTATE_END_OF_LIST()
 460    }
 461};
 462
 463static const VMStateDescription vmstate_configuration = {
 464    .name = "configuration",
 465    .version_id = 1,
 466    .pre_load = configuration_pre_load,
 467    .post_load = configuration_post_load,
 468    .pre_save = configuration_pre_save,
 469    .fields = (VMStateField[]) {
 470        VMSTATE_UINT32(len, SaveState),
 471        VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
 472        VMSTATE_END_OF_LIST()
 473    },
 474    .subsections = (const VMStateDescription*[]) {
 475        &vmstate_target_page_bits,
 476        &vmstate_capabilites,
 477        NULL
 478    }
 479};
 480
 481static void dump_vmstate_vmsd(FILE *out_file,
 482                              const VMStateDescription *vmsd, int indent,
 483                              bool is_subsection);
 484
 485static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
 486                              int indent)
 487{
 488    fprintf(out_file, "%*s{\n", indent, "");
 489    indent += 2;
 490    fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
 491    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 492            field->version_id);
 493    fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
 494            field->field_exists ? "true" : "false");
 495    fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
 496    if (field->vmsd != NULL) {
 497        fprintf(out_file, ",\n");
 498        dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
 499    }
 500    fprintf(out_file, "\n%*s}", indent - 2, "");
 501}
 502
 503static void dump_vmstate_vmss(FILE *out_file,
 504                              const VMStateDescription **subsection,
 505                              int indent)
 506{
 507    if (*subsection != NULL) {
 508        dump_vmstate_vmsd(out_file, *subsection, indent, true);
 509    }
 510}
 511
 512static void dump_vmstate_vmsd(FILE *out_file,
 513                              const VMStateDescription *vmsd, int indent,
 514                              bool is_subsection)
 515{
 516    if (is_subsection) {
 517        fprintf(out_file, "%*s{\n", indent, "");
 518    } else {
 519        fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
 520    }
 521    indent += 2;
 522    fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
 523    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 524            vmsd->version_id);
 525    fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
 526            vmsd->minimum_version_id);
 527    if (vmsd->fields != NULL) {
 528        const VMStateField *field = vmsd->fields;
 529        bool first;
 530
 531        fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
 532        first = true;
 533        while (field->name != NULL) {
 534            if (field->flags & VMS_MUST_EXIST) {
 535                /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
 536                field++;
 537                continue;
 538            }
 539            if (!first) {
 540                fprintf(out_file, ",\n");
 541            }
 542            dump_vmstate_vmsf(out_file, field, indent + 2);
 543            field++;
 544            first = false;
 545        }
 546        fprintf(out_file, "\n%*s]", indent, "");
 547    }
 548    if (vmsd->subsections != NULL) {
 549        const VMStateDescription **subsection = vmsd->subsections;
 550        bool first;
 551
 552        fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
 553        first = true;
 554        while (*subsection != NULL) {
 555            if (!first) {
 556                fprintf(out_file, ",\n");
 557            }
 558            dump_vmstate_vmss(out_file, subsection, indent + 2);
 559            subsection++;
 560            first = false;
 561        }
 562        fprintf(out_file, "\n%*s]", indent, "");
 563    }
 564    fprintf(out_file, "\n%*s}", indent - 2, "");
 565}
 566
 567static void dump_machine_type(FILE *out_file)
 568{
 569    MachineClass *mc;
 570
 571    mc = MACHINE_GET_CLASS(current_machine);
 572
 573    fprintf(out_file, "  \"vmschkmachine\": {\n");
 574    fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
 575    fprintf(out_file, "  },\n");
 576}
 577
 578void dump_vmstate_json_to_file(FILE *out_file)
 579{
 580    GSList *list, *elt;
 581    bool first;
 582
 583    fprintf(out_file, "{\n");
 584    dump_machine_type(out_file);
 585
 586    first = true;
 587    list = object_class_get_list(TYPE_DEVICE, true);
 588    for (elt = list; elt; elt = elt->next) {
 589        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
 590                                             TYPE_DEVICE);
 591        const char *name;
 592        int indent = 2;
 593
 594        if (!dc->vmsd) {
 595            continue;
 596        }
 597
 598        if (!first) {
 599            fprintf(out_file, ",\n");
 600        }
 601        name = object_class_get_name(OBJECT_CLASS(dc));
 602        fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
 603        indent += 2;
 604        fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
 605        fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 606                dc->vmsd->version_id);
 607        fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
 608                dc->vmsd->minimum_version_id);
 609
 610        dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
 611
 612        fprintf(out_file, "\n%*s}", indent - 2, "");
 613        first = false;
 614    }
 615    fprintf(out_file, "\n}\n");
 616    fclose(out_file);
 617}
 618
 619static int calculate_new_instance_id(const char *idstr)
 620{
 621    SaveStateEntry *se;
 622    int instance_id = 0;
 623
 624    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 625        if (strcmp(idstr, se->idstr) == 0
 626            && instance_id <= se->instance_id) {
 627            instance_id = se->instance_id + 1;
 628        }
 629    }
 630    return instance_id;
 631}
 632
 633static int calculate_compat_instance_id(const char *idstr)
 634{
 635    SaveStateEntry *se;
 636    int instance_id = 0;
 637
 638    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 639        if (!se->compat) {
 640            continue;
 641        }
 642
 643        if (strcmp(idstr, se->compat->idstr) == 0
 644            && instance_id <= se->compat->instance_id) {
 645            instance_id = se->compat->instance_id + 1;
 646        }
 647    }
 648    return instance_id;
 649}
 650
 651static inline MigrationPriority save_state_priority(SaveStateEntry *se)
 652{
 653    if (se->vmsd) {
 654        return se->vmsd->priority;
 655    }
 656    return MIG_PRI_DEFAULT;
 657}
 658
 659static void savevm_state_handler_insert(SaveStateEntry *nse)
 660{
 661    MigrationPriority priority = save_state_priority(nse);
 662    SaveStateEntry *se;
 663
 664    assert(priority <= MIG_PRI_MAX);
 665
 666    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 667        if (save_state_priority(se) < priority) {
 668            break;
 669        }
 670    }
 671
 672    if (se) {
 673        QTAILQ_INSERT_BEFORE(se, nse, entry);
 674    } else {
 675        QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
 676    }
 677}
 678
 679/* TODO: Individual devices generally have very little idea about the rest
 680   of the system, so instance_id should be removed/replaced.
 681   Meanwhile pass -1 as instance_id if you do not already have a clearly
 682   distinguishing id for all instances of your device class. */
 683int register_savevm_live(DeviceState *dev,
 684                         const char *idstr,
 685                         int instance_id,
 686                         int version_id,
 687                         const SaveVMHandlers *ops,
 688                         void *opaque)
 689{
 690    SaveStateEntry *se;
 691
 692    se = g_new0(SaveStateEntry, 1);
 693    se->version_id = version_id;
 694    se->section_id = savevm_state.global_section_id++;
 695    se->ops = ops;
 696    se->opaque = opaque;
 697    se->vmsd = NULL;
 698    /* if this is a live_savem then set is_ram */
 699    if (ops->save_setup != NULL) {
 700        se->is_ram = 1;
 701    }
 702
 703    if (dev) {
 704        char *id = qdev_get_dev_path(dev);
 705        if (id) {
 706            if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
 707                sizeof(se->idstr)) {
 708                error_report("Path too long for VMState (%s)", id);
 709                g_free(id);
 710                g_free(se);
 711
 712                return -1;
 713            }
 714            g_free(id);
 715
 716            se->compat = g_new0(CompatEntry, 1);
 717            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
 718            se->compat->instance_id = instance_id == -1 ?
 719                         calculate_compat_instance_id(idstr) : instance_id;
 720            instance_id = -1;
 721        }
 722    }
 723    pstrcat(se->idstr, sizeof(se->idstr), idstr);
 724
 725    if (instance_id == -1) {
 726        se->instance_id = calculate_new_instance_id(se->idstr);
 727    } else {
 728        se->instance_id = instance_id;
 729    }
 730    assert(!se->compat || se->instance_id == 0);
 731    savevm_state_handler_insert(se);
 732    return 0;
 733}
 734
 735void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
 736{
 737    SaveStateEntry *se, *new_se;
 738    char id[256] = "";
 739
 740    if (dev) {
 741        char *path = qdev_get_dev_path(dev);
 742        if (path) {
 743            pstrcpy(id, sizeof(id), path);
 744            pstrcat(id, sizeof(id), "/");
 745            g_free(path);
 746        }
 747    }
 748    pstrcat(id, sizeof(id), idstr);
 749
 750    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 751        if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
 752            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 753            g_free(se->compat);
 754            g_free(se);
 755        }
 756    }
 757}
 758
 759int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 760                                   const VMStateDescription *vmsd,
 761                                   void *opaque, int alias_id,
 762                                   int required_for_version,
 763                                   Error **errp)
 764{
 765    SaveStateEntry *se;
 766
 767    /* If this triggers, alias support can be dropped for the vmsd. */
 768    assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
 769
 770    se = g_new0(SaveStateEntry, 1);
 771    se->version_id = vmsd->version_id;
 772    se->section_id = savevm_state.global_section_id++;
 773    se->opaque = opaque;
 774    se->vmsd = vmsd;
 775    se->alias_id = alias_id;
 776
 777    if (dev) {
 778        char *id = qdev_get_dev_path(dev);
 779        if (id) {
 780            if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
 781                sizeof(se->idstr)) {
 782                error_setg(errp, "Path too long for VMState (%s)", id);
 783                g_free(id);
 784                g_free(se);
 785
 786                return -1;
 787            }
 788            g_free(id);
 789
 790            se->compat = g_new0(CompatEntry, 1);
 791            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
 792            se->compat->instance_id = instance_id == -1 ?
 793                         calculate_compat_instance_id(vmsd->name) : instance_id;
 794            instance_id = -1;
 795        }
 796    }
 797    pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
 798
 799    if (instance_id == -1) {
 800        se->instance_id = calculate_new_instance_id(se->idstr);
 801    } else {
 802        se->instance_id = instance_id;
 803    }
 804    assert(!se->compat || se->instance_id == 0);
 805    savevm_state_handler_insert(se);
 806    return 0;
 807}
 808
 809void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
 810                        void *opaque)
 811{
 812    SaveStateEntry *se, *new_se;
 813
 814    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 815        if (se->vmsd == vmsd && se->opaque == opaque) {
 816            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 817            g_free(se->compat);
 818            g_free(se);
 819        }
 820    }
 821}
 822
 823static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
 824{
 825    trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 826    if (!se->vmsd) {         /* Old style */
 827        return se->ops->load_state(f, se->opaque, se->load_version_id);
 828    }
 829    return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
 830}
 831
 832static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 833{
 834    int64_t old_offset, size;
 835
 836    old_offset = qemu_ftell_fast(f);
 837    se->ops->save_state(f, se->opaque);
 838    size = qemu_ftell_fast(f) - old_offset;
 839
 840    if (vmdesc) {
 841        json_prop_int(vmdesc, "size", size);
 842        json_start_array(vmdesc, "fields");
 843        json_start_object(vmdesc, NULL);
 844        json_prop_str(vmdesc, "name", "data");
 845        json_prop_int(vmdesc, "size", size);
 846        json_prop_str(vmdesc, "type", "buffer");
 847        json_end_object(vmdesc);
 848        json_end_array(vmdesc);
 849    }
 850}
 851
 852static int vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 853{
 854    trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 855    if (!se->vmsd) {
 856        vmstate_save_old_style(f, se, vmdesc);
 857        return 0;
 858    }
 859    return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 860}
 861
 862/*
 863 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
 864 */
 865static void save_section_header(QEMUFile *f, SaveStateEntry *se,
 866                                uint8_t section_type)
 867{
 868    qemu_put_byte(f, section_type);
 869    qemu_put_be32(f, se->section_id);
 870
 871    if (section_type == QEMU_VM_SECTION_FULL ||
 872        section_type == QEMU_VM_SECTION_START) {
 873        /* ID string */
 874        size_t len = strlen(se->idstr);
 875        qemu_put_byte(f, len);
 876        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
 877
 878        qemu_put_be32(f, se->instance_id);
 879        qemu_put_be32(f, se->version_id);
 880    }
 881}
 882
 883/*
 884 * Write a footer onto device sections that catches cases misformatted device
 885 * sections.
 886 */
 887static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
 888{
 889    if (migrate_get_current()->send_section_footer) {
 890        qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
 891        qemu_put_be32(f, se->section_id);
 892    }
 893}
 894
 895/**
 896 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
 897 *                           command and associated data.
 898 *
 899 * @f: File to send command on
 900 * @command: Command type to send
 901 * @len: Length of associated data
 902 * @data: Data associated with command.
 903 */
 904static void qemu_savevm_command_send(QEMUFile *f,
 905                                     enum qemu_vm_cmd command,
 906                                     uint16_t len,
 907                                     uint8_t *data)
 908{
 909    trace_savevm_command_send(command, len);
 910    qemu_put_byte(f, QEMU_VM_COMMAND);
 911    qemu_put_be16(f, (uint16_t)command);
 912    qemu_put_be16(f, len);
 913    qemu_put_buffer(f, data, len);
 914    qemu_fflush(f);
 915}
 916
 917void qemu_savevm_send_colo_enable(QEMUFile *f)
 918{
 919    trace_savevm_send_colo_enable();
 920    qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
 921}
 922
 923void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
 924{
 925    uint32_t buf;
 926
 927    trace_savevm_send_ping(value);
 928    buf = cpu_to_be32(value);
 929    qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
 930}
 931
 932void qemu_savevm_send_open_return_path(QEMUFile *f)
 933{
 934    trace_savevm_send_open_return_path();
 935    qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
 936}
 937
 938/* We have a buffer of data to send; we don't want that all to be loaded
 939 * by the command itself, so the command contains just the length of the
 940 * extra buffer that we then send straight after it.
 941 * TODO: Must be a better way to organise that
 942 *
 943 * Returns:
 944 *    0 on success
 945 *    -ve on error
 946 */
 947int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
 948{
 949    uint32_t tmp;
 950
 951    if (len > MAX_VM_CMD_PACKAGED_SIZE) {
 952        error_report("%s: Unreasonably large packaged state: %zu",
 953                     __func__, len);
 954        return -1;
 955    }
 956
 957    tmp = cpu_to_be32(len);
 958
 959    trace_qemu_savevm_send_packaged();
 960    qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
 961
 962    qemu_put_buffer(f, buf, len);
 963
 964    return 0;
 965}
 966
 967/* Send prior to any postcopy transfer */
 968void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 969{
 970    if (migrate_postcopy_ram()) {
 971        uint64_t tmp[2];
 972        tmp[0] = cpu_to_be64(ram_pagesize_summary());
 973        tmp[1] = cpu_to_be64(qemu_target_page_size());
 974
 975        trace_qemu_savevm_send_postcopy_advise();
 976        qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
 977                                 16, (uint8_t *)tmp);
 978    } else {
 979        qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
 980    }
 981}
 982
 983/* Sent prior to starting the destination running in postcopy, discard pages
 984 * that have already been sent but redirtied on the source.
 985 * CMD_POSTCOPY_RAM_DISCARD consist of:
 986 *      byte   version (0)
 987 *      byte   Length of name field (not including 0)
 988 *  n x byte   RAM block name
 989 *      byte   0 terminator (just for safety)
 990 *  n x        Byte ranges within the named RAMBlock
 991 *      be64   Start of the range
 992 *      be64   Length
 993 *
 994 *  name:  RAMBlock name that these entries are part of
 995 *  len: Number of page entries
 996 *  start_list: 'len' addresses
 997 *  length_list: 'len' addresses
 998 *
 999 */
1000void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
1001                                           uint16_t len,
1002                                           uint64_t *start_list,
1003                                           uint64_t *length_list)
1004{
1005    uint8_t *buf;
1006    uint16_t tmplen;
1007    uint16_t t;
1008    size_t name_len = strlen(name);
1009
1010    trace_qemu_savevm_send_postcopy_ram_discard(name, len);
1011    assert(name_len < 256);
1012    buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
1013    buf[0] = postcopy_ram_discard_version;
1014    buf[1] = name_len;
1015    memcpy(buf + 2, name, name_len);
1016    tmplen = 2 + name_len;
1017    buf[tmplen++] = '\0';
1018
1019    for (t = 0; t < len; t++) {
1020        stq_be_p(buf + tmplen, start_list[t]);
1021        tmplen += 8;
1022        stq_be_p(buf + tmplen, length_list[t]);
1023        tmplen += 8;
1024    }
1025    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
1026    g_free(buf);
1027}
1028
1029/* Get the destination into a state where it can receive postcopy data. */
1030void qemu_savevm_send_postcopy_listen(QEMUFile *f)
1031{
1032    trace_savevm_send_postcopy_listen();
1033    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
1034}
1035
1036/* Kick the destination into running */
1037void qemu_savevm_send_postcopy_run(QEMUFile *f)
1038{
1039    trace_savevm_send_postcopy_run();
1040    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
1041}
1042
1043void qemu_savevm_send_postcopy_resume(QEMUFile *f)
1044{
1045    trace_savevm_send_postcopy_resume();
1046    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
1047}
1048
1049void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
1050{
1051    size_t len;
1052    char buf[256];
1053
1054    trace_savevm_send_recv_bitmap(block_name);
1055
1056    buf[0] = len = strlen(block_name);
1057    memcpy(buf + 1, block_name, len);
1058
1059    qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
1060}
1061
1062bool qemu_savevm_state_blocked(Error **errp)
1063{
1064    SaveStateEntry *se;
1065
1066    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1067        if (se->vmsd && se->vmsd->unmigratable) {
1068            error_setg(errp, "State blocked by non-migratable device '%s'",
1069                       se->idstr);
1070            return true;
1071        }
1072    }
1073    return false;
1074}
1075
1076void qemu_savevm_state_header(QEMUFile *f)
1077{
1078    trace_savevm_state_header();
1079    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1080    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1081
1082    if (migrate_get_current()->send_configuration) {
1083        qemu_put_byte(f, QEMU_VM_CONFIGURATION);
1084        vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
1085    }
1086}
1087
1088void qemu_savevm_state_setup(QEMUFile *f)
1089{
1090    SaveStateEntry *se;
1091    Error *local_err = NULL;
1092    int ret;
1093
1094    trace_savevm_state_setup();
1095    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1096        if (!se->ops || !se->ops->save_setup) {
1097            continue;
1098        }
1099        if (se->ops && se->ops->is_active) {
1100            if (!se->ops->is_active(se->opaque)) {
1101                continue;
1102            }
1103        }
1104        save_section_header(f, se, QEMU_VM_SECTION_START);
1105
1106        ret = se->ops->save_setup(f, se->opaque);
1107        save_section_footer(f, se);
1108        if (ret < 0) {
1109            qemu_file_set_error(f, ret);
1110            break;
1111        }
1112    }
1113
1114    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
1115        error_report_err(local_err);
1116    }
1117}
1118
1119int qemu_savevm_state_resume_prepare(MigrationState *s)
1120{
1121    SaveStateEntry *se;
1122    int ret;
1123
1124    trace_savevm_state_resume_prepare();
1125
1126    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1127        if (!se->ops || !se->ops->resume_prepare) {
1128            continue;
1129        }
1130        if (se->ops && se->ops->is_active) {
1131            if (!se->ops->is_active(se->opaque)) {
1132                continue;
1133            }
1134        }
1135        ret = se->ops->resume_prepare(s, se->opaque);
1136        if (ret < 0) {
1137            return ret;
1138        }
1139    }
1140
1141    return 0;
1142}
1143
1144/*
1145 * this function has three return values:
1146 *   negative: there was one error, and we have -errno.
1147 *   0 : We haven't finished, caller have to go again
1148 *   1 : We have finished, we can go to complete phase
1149 */
1150int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
1151{
1152    SaveStateEntry *se;
1153    int ret = 1;
1154
1155    trace_savevm_state_iterate();
1156    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1157        if (!se->ops || !se->ops->save_live_iterate) {
1158            continue;
1159        }
1160        if (se->ops->is_active &&
1161            !se->ops->is_active(se->opaque)) {
1162            continue;
1163        }
1164        if (se->ops->is_active_iterate &&
1165            !se->ops->is_active_iterate(se->opaque)) {
1166            continue;
1167        }
1168        /*
1169         * In the postcopy phase, any device that doesn't know how to
1170         * do postcopy should have saved it's state in the _complete
1171         * call that's already run, it might get confused if we call
1172         * iterate afterwards.
1173         */
1174        if (postcopy &&
1175            !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
1176            continue;
1177        }
1178        if (qemu_file_rate_limit(f)) {
1179            return 0;
1180        }
1181        trace_savevm_section_start(se->idstr, se->section_id);
1182
1183        save_section_header(f, se, QEMU_VM_SECTION_PART);
1184
1185        ret = se->ops->save_live_iterate(f, se->opaque);
1186        trace_savevm_section_end(se->idstr, se->section_id, ret);
1187        save_section_footer(f, se);
1188
1189        if (ret < 0) {
1190            qemu_file_set_error(f, ret);
1191        }
1192        if (ret <= 0) {
1193            /* Do not proceed to the next vmstate before this one reported
1194               completion of the current stage. This serializes the migration
1195               and reduces the probability that a faster changing state is
1196               synchronized over and over again. */
1197            break;
1198        }
1199    }
1200    return ret;
1201}
1202
1203static bool should_send_vmdesc(void)
1204{
1205    MachineState *machine = MACHINE(qdev_get_machine());
1206    bool in_postcopy = migration_in_postcopy();
1207    return !machine->suppress_vmdesc && !in_postcopy;
1208}
1209
1210/*
1211 * Calls the save_live_complete_postcopy methods
1212 * causing the last few pages to be sent immediately and doing any associated
1213 * cleanup.
1214 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1215 * all the other devices, but that happens at the point we switch to postcopy.
1216 */
1217void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1218{
1219    SaveStateEntry *se;
1220    int ret;
1221
1222    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1223        if (!se->ops || !se->ops->save_live_complete_postcopy) {
1224            continue;
1225        }
1226        if (se->ops && se->ops->is_active) {
1227            if (!se->ops->is_active(se->opaque)) {
1228                continue;
1229            }
1230        }
1231        trace_savevm_section_start(se->idstr, se->section_id);
1232        /* Section type */
1233        qemu_put_byte(f, QEMU_VM_SECTION_END);
1234        qemu_put_be32(f, se->section_id);
1235
1236        ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1237        trace_savevm_section_end(se->idstr, se->section_id, ret);
1238        save_section_footer(f, se);
1239        if (ret < 0) {
1240            qemu_file_set_error(f, ret);
1241            return;
1242        }
1243    }
1244
1245    qemu_put_byte(f, QEMU_VM_EOF);
1246    qemu_fflush(f);
1247}
1248
1249int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
1250                                       bool inactivate_disks)
1251{
1252    QJSON *vmdesc;
1253    int vmdesc_len;
1254    SaveStateEntry *se;
1255    int ret;
1256    bool in_postcopy = migration_in_postcopy();
1257    Error *local_err = NULL;
1258
1259    if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
1260        error_report_err(local_err);
1261    }
1262
1263    trace_savevm_state_complete_precopy();
1264
1265    cpu_synchronize_all_states();
1266
1267    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1268        if (!se->ops ||
1269            (in_postcopy && se->ops->has_postcopy &&
1270             se->ops->has_postcopy(se->opaque)) ||
1271            (in_postcopy && !iterable_only) ||
1272            !se->ops->save_live_complete_precopy) {
1273            continue;
1274        }
1275
1276        if (se->ops && se->ops->is_active) {
1277            if (!se->ops->is_active(se->opaque)) {
1278                continue;
1279            }
1280        }
1281        trace_savevm_section_start(se->idstr, se->section_id);
1282
1283        save_section_header(f, se, QEMU_VM_SECTION_END);
1284
1285        ret = se->ops->save_live_complete_precopy(f, se->opaque);
1286        trace_savevm_section_end(se->idstr, se->section_id, ret);
1287        save_section_footer(f, se);
1288        if (ret < 0) {
1289            qemu_file_set_error(f, ret);
1290            return -1;
1291        }
1292    }
1293
1294    if (iterable_only) {
1295        return 0;
1296    }
1297
1298    vmdesc = qjson_new();
1299    json_prop_int(vmdesc, "page_size", qemu_target_page_size());
1300    json_start_array(vmdesc, "devices");
1301    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1302
1303        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1304            continue;
1305        }
1306        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1307            trace_savevm_section_skip(se->idstr, se->section_id);
1308            continue;
1309        }
1310
1311        trace_savevm_section_start(se->idstr, se->section_id);
1312
1313        json_start_object(vmdesc, NULL);
1314        json_prop_str(vmdesc, "name", se->idstr);
1315        json_prop_int(vmdesc, "instance_id", se->instance_id);
1316
1317        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1318        ret = vmstate_save(f, se, vmdesc);
1319        if (ret) {
1320            qemu_file_set_error(f, ret);
1321            return ret;
1322        }
1323        trace_savevm_section_end(se->idstr, se->section_id, 0);
1324        save_section_footer(f, se);
1325
1326        json_end_object(vmdesc);
1327    }
1328
1329    if (inactivate_disks) {
1330        /* Inactivate before sending QEMU_VM_EOF so that the
1331         * bdrv_invalidate_cache_all() on the other end won't fail. */
1332        ret = bdrv_inactivate_all();
1333        if (ret) {
1334            error_report("%s: bdrv_inactivate_all() failed (%d)",
1335                         __func__, ret);
1336            qemu_file_set_error(f, ret);
1337            return ret;
1338        }
1339    }
1340    if (!in_postcopy) {
1341        /* Postcopy stream will still be going */
1342        qemu_put_byte(f, QEMU_VM_EOF);
1343    }
1344
1345    json_end_array(vmdesc);
1346    qjson_finish(vmdesc);
1347    vmdesc_len = strlen(qjson_get_str(vmdesc));
1348
1349    if (should_send_vmdesc()) {
1350        qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1351        qemu_put_be32(f, vmdesc_len);
1352        qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1353    }
1354    qjson_destroy(vmdesc);
1355
1356    qemu_fflush(f);
1357    return 0;
1358}
1359
1360/* Give an estimate of the amount left to be transferred,
1361 * the result is split into the amount for units that can and
1362 * for units that can't do postcopy.
1363 */
1364void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
1365                               uint64_t *res_precopy_only,
1366                               uint64_t *res_compatible,
1367                               uint64_t *res_postcopy_only)
1368{
1369    SaveStateEntry *se;
1370
1371    *res_precopy_only = 0;
1372    *res_compatible = 0;
1373    *res_postcopy_only = 0;
1374
1375
1376    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1377        if (!se->ops || !se->ops->save_live_pending) {
1378            continue;
1379        }
1380        if (se->ops && se->ops->is_active) {
1381            if (!se->ops->is_active(se->opaque)) {
1382                continue;
1383            }
1384        }
1385        se->ops->save_live_pending(f, se->opaque, threshold_size,
1386                                   res_precopy_only, res_compatible,
1387                                   res_postcopy_only);
1388    }
1389}
1390
1391void qemu_savevm_state_cleanup(void)
1392{
1393    SaveStateEntry *se;
1394    Error *local_err = NULL;
1395
1396    if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
1397        error_report_err(local_err);
1398    }
1399
1400    trace_savevm_state_cleanup();
1401    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1402        if (se->ops && se->ops->save_cleanup) {
1403            se->ops->save_cleanup(se->opaque);
1404        }
1405    }
1406}
1407
1408static int qemu_savevm_state(QEMUFile *f, Error **errp)
1409{
1410    int ret;
1411    MigrationState *ms = migrate_get_current();
1412    MigrationStatus status;
1413
1414    if (migration_is_setup_or_active(ms->state) ||
1415        ms->state == MIGRATION_STATUS_CANCELLING ||
1416        ms->state == MIGRATION_STATUS_COLO) {
1417        error_setg(errp, QERR_MIGRATION_ACTIVE);
1418        return -EINVAL;
1419    }
1420
1421    if (migrate_use_block()) {
1422        error_setg(errp, "Block migration and snapshots are incompatible");
1423        return -EINVAL;
1424    }
1425
1426    migrate_init(ms);
1427    ms->to_dst_file = f;
1428
1429    qemu_mutex_unlock_iothread();
1430    qemu_savevm_state_header(f);
1431    qemu_savevm_state_setup(f);
1432    qemu_mutex_lock_iothread();
1433
1434    while (qemu_file_get_error(f) == 0) {
1435        if (qemu_savevm_state_iterate(f, false) > 0) {
1436            break;
1437        }
1438    }
1439
1440    ret = qemu_file_get_error(f);
1441    if (ret == 0) {
1442        qemu_savevm_state_complete_precopy(f, false, false);
1443        ret = qemu_file_get_error(f);
1444    }
1445    qemu_savevm_state_cleanup();
1446    if (ret != 0) {
1447        error_setg_errno(errp, -ret, "Error while writing VM state");
1448    }
1449
1450    if (ret != 0) {
1451        status = MIGRATION_STATUS_FAILED;
1452    } else {
1453        status = MIGRATION_STATUS_COMPLETED;
1454    }
1455    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
1456
1457    /* f is outer parameter, it should not stay in global migration state after
1458     * this function finished */
1459    ms->to_dst_file = NULL;
1460
1461    return ret;
1462}
1463
1464void qemu_savevm_live_state(QEMUFile *f)
1465{
1466    /* save QEMU_VM_SECTION_END section */
1467    qemu_savevm_state_complete_precopy(f, true, false);
1468    qemu_put_byte(f, QEMU_VM_EOF);
1469}
1470
1471int qemu_save_device_state(QEMUFile *f)
1472{
1473    SaveStateEntry *se;
1474
1475    if (!migration_in_colo_state()) {
1476        qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1477        qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1478    }
1479    cpu_synchronize_all_states();
1480
1481    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1482        int ret;
1483
1484        if (se->is_ram) {
1485            continue;
1486        }
1487        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1488            continue;
1489        }
1490        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1491            continue;
1492        }
1493
1494        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1495
1496        ret = vmstate_save(f, se, NULL);
1497        if (ret) {
1498            return ret;
1499        }
1500
1501        save_section_footer(f, se);
1502    }
1503
1504    qemu_put_byte(f, QEMU_VM_EOF);
1505
1506    return qemu_file_get_error(f);
1507}
1508
1509static SaveStateEntry *find_se(const char *idstr, int instance_id)
1510{
1511    SaveStateEntry *se;
1512
1513    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1514        if (!strcmp(se->idstr, idstr) &&
1515            (instance_id == se->instance_id ||
1516             instance_id == se->alias_id))
1517            return se;
1518        /* Migrating from an older version? */
1519        if (strstr(se->idstr, idstr) && se->compat) {
1520            if (!strcmp(se->compat->idstr, idstr) &&
1521                (instance_id == se->compat->instance_id ||
1522                 instance_id == se->alias_id))
1523                return se;
1524        }
1525    }
1526    return NULL;
1527}
1528
1529enum LoadVMExitCodes {
1530    /* Allow a command to quit all layers of nested loadvm loops */
1531    LOADVM_QUIT     =  1,
1532};
1533
1534/* ------ incoming postcopy messages ------ */
1535/* 'advise' arrives before any transfers just to tell us that a postcopy
1536 * *might* happen - it might be skipped if precopy transferred everything
1537 * quickly.
1538 */
1539static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
1540                                         uint16_t len)
1541{
1542    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1543    uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
1544    Error *local_err = NULL;
1545
1546    trace_loadvm_postcopy_handle_advise();
1547    if (ps != POSTCOPY_INCOMING_NONE) {
1548        error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1549        return -1;
1550    }
1551
1552    switch (len) {
1553    case 0:
1554        if (migrate_postcopy_ram()) {
1555            error_report("RAM postcopy is enabled but have 0 byte advise");
1556            return -EINVAL;
1557        }
1558        return 0;
1559    case 8 + 8:
1560        if (!migrate_postcopy_ram()) {
1561            error_report("RAM postcopy is disabled but have 16 byte advise");
1562            return -EINVAL;
1563        }
1564        break;
1565    default:
1566        error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
1567        return -EINVAL;
1568    }
1569
1570    if (!postcopy_ram_supported_by_host(mis)) {
1571        postcopy_state_set(POSTCOPY_INCOMING_NONE);
1572        return -1;
1573    }
1574
1575    remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
1576    local_pagesize_summary = ram_pagesize_summary();
1577
1578    if (remote_pagesize_summary != local_pagesize_summary)  {
1579        /*
1580         * This detects two potential causes of mismatch:
1581         *   a) A mismatch in host page sizes
1582         *      Some combinations of mismatch are probably possible but it gets
1583         *      a bit more complicated.  In particular we need to place whole
1584         *      host pages on the dest at once, and we need to ensure that we
1585         *      handle dirtying to make sure we never end up sending part of
1586         *      a hostpage on it's own.
1587         *   b) The use of different huge page sizes on source/destination
1588         *      a more fine grain test is performed during RAM block migration
1589         *      but this test here causes a nice early clear failure, and
1590         *      also fails when passed to an older qemu that doesn't
1591         *      do huge pages.
1592         */
1593        error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
1594                                                             " d=%" PRIx64 ")",
1595                     remote_pagesize_summary, local_pagesize_summary);
1596        return -1;
1597    }
1598
1599    remote_tps = qemu_get_be64(mis->from_src_file);
1600    if (remote_tps != qemu_target_page_size()) {
1601        /*
1602         * Again, some differences could be dealt with, but for now keep it
1603         * simple.
1604         */
1605        error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
1606                     (int)remote_tps, qemu_target_page_size());
1607        return -1;
1608    }
1609
1610    if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
1611        error_report_err(local_err);
1612        return -1;
1613    }
1614
1615    if (ram_postcopy_incoming_init(mis)) {
1616        return -1;
1617    }
1618
1619    postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1620
1621    return 0;
1622}
1623
1624/* After postcopy we will be told to throw some pages away since they're
1625 * dirty and will have to be demand fetched.  Must happen before CPU is
1626 * started.
1627 * There can be 0..many of these messages, each encoding multiple pages.
1628 */
1629static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1630                                              uint16_t len)
1631{
1632    int tmp;
1633    char ramid[256];
1634    PostcopyState ps = postcopy_state_get();
1635
1636    trace_loadvm_postcopy_ram_handle_discard();
1637
1638    switch (ps) {
1639    case POSTCOPY_INCOMING_ADVISE:
1640        /* 1st discard */
1641        tmp = postcopy_ram_prepare_discard(mis);
1642        if (tmp) {
1643            return tmp;
1644        }
1645        break;
1646
1647    case POSTCOPY_INCOMING_DISCARD:
1648        /* Expected state */
1649        break;
1650
1651    default:
1652        error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1653                     ps);
1654        return -1;
1655    }
1656    /* We're expecting a
1657     *    Version (0)
1658     *    a RAM ID string (length byte, name, 0 term)
1659     *    then at least 1 16 byte chunk
1660    */
1661    if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1662        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1663        return -1;
1664    }
1665
1666    tmp = qemu_get_byte(mis->from_src_file);
1667    if (tmp != postcopy_ram_discard_version) {
1668        error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1669        return -1;
1670    }
1671
1672    if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1673        error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1674        return -1;
1675    }
1676    tmp = qemu_get_byte(mis->from_src_file);
1677    if (tmp != 0) {
1678        error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1679        return -1;
1680    }
1681
1682    len -= 3 + strlen(ramid);
1683    if (len % 16) {
1684        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1685        return -1;
1686    }
1687    trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1688    while (len) {
1689        uint64_t start_addr, block_length;
1690        start_addr = qemu_get_be64(mis->from_src_file);
1691        block_length = qemu_get_be64(mis->from_src_file);
1692
1693        len -= 16;
1694        int ret = ram_discard_range(ramid, start_addr, block_length);
1695        if (ret) {
1696            return ret;
1697        }
1698    }
1699    trace_loadvm_postcopy_ram_handle_discard_end();
1700
1701    return 0;
1702}
1703
1704/*
1705 * Triggered by a postcopy_listen command; this thread takes over reading
1706 * the input stream, leaving the main thread free to carry on loading the rest
1707 * of the device state (from RAM).
1708 * (TODO:This could do with being in a postcopy file - but there again it's
1709 * just another input loop, not that postcopy specific)
1710 */
1711static void *postcopy_ram_listen_thread(void *opaque)
1712{
1713    MigrationIncomingState *mis = migration_incoming_get_current();
1714    QEMUFile *f = mis->from_src_file;
1715    int load_res;
1716
1717    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1718                                   MIGRATION_STATUS_POSTCOPY_ACTIVE);
1719    qemu_sem_post(&mis->listen_thread_sem);
1720    trace_postcopy_ram_listen_thread_start();
1721
1722    rcu_register_thread();
1723    /*
1724     * Because we're a thread and not a coroutine we can't yield
1725     * in qemu_file, and thus we must be blocking now.
1726     */
1727    qemu_file_set_blocking(f, true);
1728    load_res = qemu_loadvm_state_main(f, mis);
1729
1730    /*
1731     * This is tricky, but, mis->from_src_file can change after it
1732     * returns, when postcopy recovery happened. In the future, we may
1733     * want a wrapper for the QEMUFile handle.
1734     */
1735    f = mis->from_src_file;
1736
1737    /* And non-blocking again so we don't block in any cleanup */
1738    qemu_file_set_blocking(f, false);
1739
1740    trace_postcopy_ram_listen_thread_exit();
1741    if (load_res < 0) {
1742        error_report("%s: loadvm failed: %d", __func__, load_res);
1743        qemu_file_set_error(f, load_res);
1744        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1745                                       MIGRATION_STATUS_FAILED);
1746    } else {
1747        /*
1748         * This looks good, but it's possible that the device loading in the
1749         * main thread hasn't finished yet, and so we might not be in 'RUN'
1750         * state yet; wait for the end of the main thread.
1751         */
1752        qemu_event_wait(&mis->main_thread_load_event);
1753    }
1754    postcopy_ram_incoming_cleanup(mis);
1755
1756    if (load_res < 0) {
1757        /*
1758         * If something went wrong then we have a bad state so exit;
1759         * depending how far we got it might be possible at this point
1760         * to leave the guest running and fire MCEs for pages that never
1761         * arrived as a desperate recovery step.
1762         */
1763        rcu_unregister_thread();
1764        exit(EXIT_FAILURE);
1765    }
1766
1767    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1768                                   MIGRATION_STATUS_COMPLETED);
1769    /*
1770     * If everything has worked fine, then the main thread has waited
1771     * for us to start, and we're the last use of the mis.
1772     * (If something broke then qemu will have to exit anyway since it's
1773     * got a bad migration state).
1774     */
1775    migration_incoming_state_destroy();
1776    qemu_loadvm_state_cleanup();
1777
1778    rcu_unregister_thread();
1779    mis->have_listen_thread = false;
1780    return NULL;
1781}
1782
1783/* After this message we must be able to immediately receive postcopy data */
1784static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1785{
1786    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1787    trace_loadvm_postcopy_handle_listen();
1788    Error *local_err = NULL;
1789
1790    if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1791        error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1792        return -1;
1793    }
1794    if (ps == POSTCOPY_INCOMING_ADVISE) {
1795        /*
1796         * A rare case, we entered listen without having to do any discards,
1797         * so do the setup that's normally done at the time of the 1st discard.
1798         */
1799        if (migrate_postcopy_ram()) {
1800            postcopy_ram_prepare_discard(mis);
1801        }
1802    }
1803
1804    /*
1805     * Sensitise RAM - can now generate requests for blocks that don't exist
1806     * However, at this point the CPU shouldn't be running, and the IO
1807     * shouldn't be doing anything yet so don't actually expect requests
1808     */
1809    if (migrate_postcopy_ram()) {
1810        if (postcopy_ram_enable_notify(mis)) {
1811            postcopy_ram_incoming_cleanup(mis);
1812            return -1;
1813        }
1814    }
1815
1816    if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
1817        error_report_err(local_err);
1818        return -1;
1819    }
1820
1821    if (mis->have_listen_thread) {
1822        error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1823        return -1;
1824    }
1825
1826    mis->have_listen_thread = true;
1827    /* Start up the listening thread and wait for it to signal ready */
1828    qemu_sem_init(&mis->listen_thread_sem, 0);
1829    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1830                       postcopy_ram_listen_thread, NULL,
1831                       QEMU_THREAD_DETACHED);
1832    qemu_sem_wait(&mis->listen_thread_sem);
1833    qemu_sem_destroy(&mis->listen_thread_sem);
1834
1835    return 0;
1836}
1837
1838
1839typedef struct {
1840    QEMUBH *bh;
1841} HandleRunBhData;
1842
1843static void loadvm_postcopy_handle_run_bh(void *opaque)
1844{
1845    Error *local_err = NULL;
1846    HandleRunBhData *data = opaque;
1847    MigrationIncomingState *mis = migration_incoming_get_current();
1848
1849    /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1850     * in migration.c
1851     */
1852    cpu_synchronize_all_post_init();
1853
1854    qemu_announce_self(&mis->announce_timer, migrate_announce_params());
1855
1856    /* Make sure all file formats flush their mutable metadata.
1857     * If we get an error here, just don't restart the VM yet. */
1858    bdrv_invalidate_cache_all(&local_err);
1859    if (local_err) {
1860        error_report_err(local_err);
1861        local_err = NULL;
1862        autostart = false;
1863    }
1864
1865    trace_loadvm_postcopy_handle_run_cpu_sync();
1866
1867    trace_loadvm_postcopy_handle_run_vmstart();
1868
1869    dirty_bitmap_mig_before_vm_start();
1870
1871    if (autostart) {
1872        /* Hold onto your hats, starting the CPU */
1873        vm_start();
1874    } else {
1875        /* leave it paused and let management decide when to start the CPU */
1876        runstate_set(RUN_STATE_PAUSED);
1877    }
1878
1879    qemu_bh_delete(data->bh);
1880    g_free(data);
1881}
1882
1883/* After all discards we can start running and asking for pages */
1884static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1885{
1886    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1887    HandleRunBhData *data;
1888
1889    trace_loadvm_postcopy_handle_run();
1890    if (ps != POSTCOPY_INCOMING_LISTENING) {
1891        error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1892        return -1;
1893    }
1894
1895    data = g_new(HandleRunBhData, 1);
1896    data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data);
1897    qemu_bh_schedule(data->bh);
1898
1899    /* We need to finish reading the stream from the package
1900     * and also stop reading anything more from the stream that loaded the
1901     * package (since it's now being read by the listener thread).
1902     * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1903     */
1904    return LOADVM_QUIT;
1905}
1906
1907static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
1908{
1909    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
1910        error_report("%s: illegal resume received", __func__);
1911        /* Don't fail the load, only for this. */
1912        return 0;
1913    }
1914
1915    /*
1916     * This means source VM is ready to resume the postcopy migration.
1917     * It's time to switch state and release the fault thread to
1918     * continue service page faults.
1919     */
1920    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
1921                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
1922    qemu_sem_post(&mis->postcopy_pause_sem_fault);
1923
1924    trace_loadvm_postcopy_handle_resume();
1925
1926    /* Tell source that "we are ready" */
1927    migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
1928
1929    return 0;
1930}
1931
1932/**
1933 * Immediately following this command is a blob of data containing an embedded
1934 * chunk of migration stream; read it and load it.
1935 *
1936 * @mis: Incoming state
1937 * @length: Length of packaged data to read
1938 *
1939 * Returns: Negative values on error
1940 *
1941 */
1942static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1943{
1944    int ret;
1945    size_t length;
1946    QIOChannelBuffer *bioc;
1947
1948    length = qemu_get_be32(mis->from_src_file);
1949    trace_loadvm_handle_cmd_packaged(length);
1950
1951    if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1952        error_report("Unreasonably large packaged state: %zu", length);
1953        return -1;
1954    }
1955
1956    bioc = qio_channel_buffer_new(length);
1957    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
1958    ret = qemu_get_buffer(mis->from_src_file,
1959                          bioc->data,
1960                          length);
1961    if (ret != length) {
1962        object_unref(OBJECT(bioc));
1963        error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
1964                     ret, length);
1965        return (ret < 0) ? ret : -EAGAIN;
1966    }
1967    bioc->usage += length;
1968    trace_loadvm_handle_cmd_packaged_received(ret);
1969
1970    QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
1971
1972    ret = qemu_loadvm_state_main(packf, mis);
1973    trace_loadvm_handle_cmd_packaged_main(ret);
1974    qemu_fclose(packf);
1975    object_unref(OBJECT(bioc));
1976
1977    return ret;
1978}
1979
1980/*
1981 * Handle request that source requests for recved_bitmap on
1982 * destination. Payload format:
1983 *
1984 * len (1 byte) + ramblock_name (<255 bytes)
1985 */
1986static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
1987                                     uint16_t len)
1988{
1989    QEMUFile *file = mis->from_src_file;
1990    RAMBlock *rb;
1991    char block_name[256];
1992    size_t cnt;
1993
1994    cnt = qemu_get_counted_string(file, block_name);
1995    if (!cnt) {
1996        error_report("%s: failed to read block name", __func__);
1997        return -EINVAL;
1998    }
1999
2000    /* Validate before using the data */
2001    if (qemu_file_get_error(file)) {
2002        return qemu_file_get_error(file);
2003    }
2004
2005    if (len != cnt + 1) {
2006        error_report("%s: invalid payload length (%d)", __func__, len);
2007        return -EINVAL;
2008    }
2009
2010    rb = qemu_ram_block_by_name(block_name);
2011    if (!rb) {
2012        error_report("%s: block '%s' not found", __func__, block_name);
2013        return -EINVAL;
2014    }
2015
2016    migrate_send_rp_recv_bitmap(mis, block_name);
2017
2018    trace_loadvm_handle_recv_bitmap(block_name);
2019
2020    return 0;
2021}
2022
2023static int loadvm_process_enable_colo(MigrationIncomingState *mis)
2024{
2025    migration_incoming_enable_colo();
2026    return colo_init_ram_cache();
2027}
2028
2029/*
2030 * Process an incoming 'QEMU_VM_COMMAND'
2031 * 0           just a normal return
2032 * LOADVM_QUIT All good, but exit the loop
2033 * <0          Error
2034 */
2035static int loadvm_process_command(QEMUFile *f)
2036{
2037    MigrationIncomingState *mis = migration_incoming_get_current();
2038    uint16_t cmd;
2039    uint16_t len;
2040    uint32_t tmp32;
2041
2042    cmd = qemu_get_be16(f);
2043    len = qemu_get_be16(f);
2044
2045    /* Check validity before continue processing of cmds */
2046    if (qemu_file_get_error(f)) {
2047        return qemu_file_get_error(f);
2048    }
2049
2050    trace_loadvm_process_command(cmd, len);
2051    if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
2052        error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
2053        return -EINVAL;
2054    }
2055
2056    if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
2057        error_report("%s received with bad length - expecting %zu, got %d",
2058                     mig_cmd_args[cmd].name,
2059                     (size_t)mig_cmd_args[cmd].len, len);
2060        return -ERANGE;
2061    }
2062
2063    switch (cmd) {
2064    case MIG_CMD_OPEN_RETURN_PATH:
2065        if (mis->to_src_file) {
2066            error_report("CMD_OPEN_RETURN_PATH called when RP already open");
2067            /* Not really a problem, so don't give up */
2068            return 0;
2069        }
2070        mis->to_src_file = qemu_file_get_return_path(f);
2071        if (!mis->to_src_file) {
2072            error_report("CMD_OPEN_RETURN_PATH failed");
2073            return -1;
2074        }
2075        break;
2076
2077    case MIG_CMD_PING:
2078        tmp32 = qemu_get_be32(f);
2079        trace_loadvm_process_command_ping(tmp32);
2080        if (!mis->to_src_file) {
2081            error_report("CMD_PING (0x%x) received with no return path",
2082                         tmp32);
2083            return -1;
2084        }
2085        migrate_send_rp_pong(mis, tmp32);
2086        break;
2087
2088    case MIG_CMD_PACKAGED:
2089        return loadvm_handle_cmd_packaged(mis);
2090
2091    case MIG_CMD_POSTCOPY_ADVISE:
2092        return loadvm_postcopy_handle_advise(mis, len);
2093
2094    case MIG_CMD_POSTCOPY_LISTEN:
2095        return loadvm_postcopy_handle_listen(mis);
2096
2097    case MIG_CMD_POSTCOPY_RUN:
2098        return loadvm_postcopy_handle_run(mis);
2099
2100    case MIG_CMD_POSTCOPY_RAM_DISCARD:
2101        return loadvm_postcopy_ram_handle_discard(mis, len);
2102
2103    case MIG_CMD_POSTCOPY_RESUME:
2104        return loadvm_postcopy_handle_resume(mis);
2105
2106    case MIG_CMD_RECV_BITMAP:
2107        return loadvm_handle_recv_bitmap(mis, len);
2108
2109    case MIG_CMD_ENABLE_COLO:
2110        return loadvm_process_enable_colo(mis);
2111    }
2112
2113    return 0;
2114}
2115
2116/*
2117 * Read a footer off the wire and check that it matches the expected section
2118 *
2119 * Returns: true if the footer was good
2120 *          false if there is a problem (and calls error_report to say why)
2121 */
2122static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
2123{
2124    int ret;
2125    uint8_t read_mark;
2126    uint32_t read_section_id;
2127
2128    if (!migrate_get_current()->send_section_footer) {
2129        /* No footer to check */
2130        return true;
2131    }
2132
2133    read_mark = qemu_get_byte(f);
2134
2135    ret = qemu_file_get_error(f);
2136    if (ret) {
2137        error_report("%s: Read section footer failed: %d",
2138                     __func__, ret);
2139        return false;
2140    }
2141
2142    if (read_mark != QEMU_VM_SECTION_FOOTER) {
2143        error_report("Missing section footer for %s", se->idstr);
2144        return false;
2145    }
2146
2147    read_section_id = qemu_get_be32(f);
2148    if (read_section_id != se->load_section_id) {
2149        error_report("Mismatched section id in footer for %s -"
2150                     " read 0x%x expected 0x%x",
2151                     se->idstr, read_section_id, se->load_section_id);
2152        return false;
2153    }
2154
2155    /* All good */
2156    return true;
2157}
2158
2159static int
2160qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
2161{
2162    uint32_t instance_id, version_id, section_id;
2163    SaveStateEntry *se;
2164    char idstr[256];
2165    int ret;
2166
2167    /* Read section start */
2168    section_id = qemu_get_be32(f);
2169    if (!qemu_get_counted_string(f, idstr)) {
2170        error_report("Unable to read ID string for section %u",
2171                     section_id);
2172        return -EINVAL;
2173    }
2174    instance_id = qemu_get_be32(f);
2175    version_id = qemu_get_be32(f);
2176
2177    ret = qemu_file_get_error(f);
2178    if (ret) {
2179        error_report("%s: Failed to read instance/version ID: %d",
2180                     __func__, ret);
2181        return ret;
2182    }
2183
2184    trace_qemu_loadvm_state_section_startfull(section_id, idstr,
2185            instance_id, version_id);
2186    /* Find savevm section */
2187    se = find_se(idstr, instance_id);
2188    if (se == NULL) {
2189        error_report("Unknown savevm section or instance '%s' %d. "
2190                     "Make sure that your current VM setup matches your "
2191                     "saved VM setup, including any hotplugged devices",
2192                     idstr, instance_id);
2193        return -EINVAL;
2194    }
2195
2196    /* Validate version */
2197    if (version_id > se->version_id) {
2198        error_report("savevm: unsupported version %d for '%s' v%d",
2199                     version_id, idstr, se->version_id);
2200        return -EINVAL;
2201    }
2202    se->load_version_id = version_id;
2203    se->load_section_id = section_id;
2204
2205    /* Validate if it is a device's state */
2206    if (xen_enabled() && se->is_ram) {
2207        error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
2208        return -EINVAL;
2209    }
2210
2211    ret = vmstate_load(f, se);
2212    if (ret < 0) {
2213        error_report("error while loading state for instance 0x%x of"
2214                     " device '%s'", instance_id, idstr);
2215        return ret;
2216    }
2217    if (!check_section_footer(f, se)) {
2218        return -EINVAL;
2219    }
2220
2221    return 0;
2222}
2223
2224static int
2225qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
2226{
2227    uint32_t section_id;
2228    SaveStateEntry *se;
2229    int ret;
2230
2231    section_id = qemu_get_be32(f);
2232
2233    ret = qemu_file_get_error(f);
2234    if (ret) {
2235        error_report("%s: Failed to read section ID: %d",
2236                     __func__, ret);
2237        return ret;
2238    }
2239
2240    trace_qemu_loadvm_state_section_partend(section_id);
2241    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2242        if (se->load_section_id == section_id) {
2243            break;
2244        }
2245    }
2246    if (se == NULL) {
2247        error_report("Unknown savevm section %d", section_id);
2248        return -EINVAL;
2249    }
2250
2251    ret = vmstate_load(f, se);
2252    if (ret < 0) {
2253        error_report("error while loading state section id %d(%s)",
2254                     section_id, se->idstr);
2255        return ret;
2256    }
2257    if (!check_section_footer(f, se)) {
2258        return -EINVAL;
2259    }
2260
2261    return 0;
2262}
2263
2264static int qemu_loadvm_state_header(QEMUFile *f)
2265{
2266    unsigned int v;
2267    int ret;
2268
2269    v = qemu_get_be32(f);
2270    if (v != QEMU_VM_FILE_MAGIC) {
2271        error_report("Not a migration stream");
2272        return -EINVAL;
2273    }
2274
2275    v = qemu_get_be32(f);
2276    if (v == QEMU_VM_FILE_VERSION_COMPAT) {
2277        error_report("SaveVM v2 format is obsolete and don't work anymore");
2278        return -ENOTSUP;
2279    }
2280    if (v != QEMU_VM_FILE_VERSION) {
2281        error_report("Unsupported migration stream version");
2282        return -ENOTSUP;
2283    }
2284
2285    if (migrate_get_current()->send_configuration) {
2286        if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
2287            error_report("Configuration section missing");
2288            qemu_loadvm_state_cleanup();
2289            return -EINVAL;
2290        }
2291        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
2292
2293        if (ret) {
2294            qemu_loadvm_state_cleanup();
2295            return ret;
2296        }
2297    }
2298    return 0;
2299}
2300
2301static int qemu_loadvm_state_setup(QEMUFile *f)
2302{
2303    SaveStateEntry *se;
2304    int ret;
2305
2306    trace_loadvm_state_setup();
2307    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2308        if (!se->ops || !se->ops->load_setup) {
2309            continue;
2310        }
2311        if (se->ops && se->ops->is_active) {
2312            if (!se->ops->is_active(se->opaque)) {
2313                continue;
2314            }
2315        }
2316
2317        ret = se->ops->load_setup(f, se->opaque);
2318        if (ret < 0) {
2319            qemu_file_set_error(f, ret);
2320            error_report("Load state of device %s failed", se->idstr);
2321            return ret;
2322        }
2323    }
2324    return 0;
2325}
2326
2327void qemu_loadvm_state_cleanup(void)
2328{
2329    SaveStateEntry *se;
2330
2331    trace_loadvm_state_cleanup();
2332    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2333        if (se->ops && se->ops->load_cleanup) {
2334            se->ops->load_cleanup(se->opaque);
2335        }
2336    }
2337}
2338
2339/* Return true if we should continue the migration, or false. */
2340static bool postcopy_pause_incoming(MigrationIncomingState *mis)
2341{
2342    trace_postcopy_pause_incoming();
2343
2344    /* Clear the triggered bit to allow one recovery */
2345    mis->postcopy_recover_triggered = false;
2346
2347    assert(mis->from_src_file);
2348    qemu_file_shutdown(mis->from_src_file);
2349    qemu_fclose(mis->from_src_file);
2350    mis->from_src_file = NULL;
2351
2352    assert(mis->to_src_file);
2353    qemu_file_shutdown(mis->to_src_file);
2354    qemu_mutex_lock(&mis->rp_mutex);
2355    qemu_fclose(mis->to_src_file);
2356    mis->to_src_file = NULL;
2357    qemu_mutex_unlock(&mis->rp_mutex);
2358
2359    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2360                      MIGRATION_STATUS_POSTCOPY_PAUSED);
2361
2362    /* Notify the fault thread for the invalidated file handle */
2363    postcopy_fault_thread_notify(mis);
2364
2365    error_report("Detected IO failure for postcopy. "
2366                 "Migration paused.");
2367
2368    while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2369        qemu_sem_wait(&mis->postcopy_pause_sem_dst);
2370    }
2371
2372    trace_postcopy_pause_incoming_continued();
2373
2374    return true;
2375}
2376
2377int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
2378{
2379    uint8_t section_type;
2380    int ret = 0;
2381
2382retry:
2383    while (true) {
2384        section_type = qemu_get_byte(f);
2385
2386        if (qemu_file_get_error(f)) {
2387            ret = qemu_file_get_error(f);
2388            break;
2389        }
2390
2391        trace_qemu_loadvm_state_section(section_type);
2392        switch (section_type) {
2393        case QEMU_VM_SECTION_START:
2394        case QEMU_VM_SECTION_FULL:
2395            ret = qemu_loadvm_section_start_full(f, mis);
2396            if (ret < 0) {
2397                goto out;
2398            }
2399            break;
2400        case QEMU_VM_SECTION_PART:
2401        case QEMU_VM_SECTION_END:
2402            ret = qemu_loadvm_section_part_end(f, mis);
2403            if (ret < 0) {
2404                goto out;
2405            }
2406            break;
2407        case QEMU_VM_COMMAND:
2408            ret = loadvm_process_command(f);
2409            trace_qemu_loadvm_state_section_command(ret);
2410            if ((ret < 0) || (ret & LOADVM_QUIT)) {
2411                goto out;
2412            }
2413            break;
2414        case QEMU_VM_EOF:
2415            /* This is the end of migration */
2416            goto out;
2417        default:
2418            error_report("Unknown savevm section type %d", section_type);
2419            ret = -EINVAL;
2420            goto out;
2421        }
2422    }
2423
2424out:
2425    if (ret < 0) {
2426        qemu_file_set_error(f, ret);
2427
2428        /*
2429         * If we are during an active postcopy, then we pause instead
2430         * of bail out to at least keep the VM's dirty data.  Note
2431         * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2432         * during which we're still receiving device states and we
2433         * still haven't yet started the VM on destination.
2434         */
2435        if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2436            postcopy_pause_incoming(mis)) {
2437            /* Reset f to point to the newly created channel */
2438            f = mis->from_src_file;
2439            goto retry;
2440        }
2441    }
2442    return ret;
2443}
2444
2445int qemu_loadvm_state(QEMUFile *f)
2446{
2447    MigrationIncomingState *mis = migration_incoming_get_current();
2448    Error *local_err = NULL;
2449    int ret;
2450
2451    if (qemu_savevm_state_blocked(&local_err)) {
2452        error_report_err(local_err);
2453        return -EINVAL;
2454    }
2455
2456    ret = qemu_loadvm_state_header(f);
2457    if (ret) {
2458        return ret;
2459    }
2460
2461    if (qemu_loadvm_state_setup(f) != 0) {
2462        return -EINVAL;
2463    }
2464
2465    cpu_synchronize_all_pre_loadvm();
2466
2467    ret = qemu_loadvm_state_main(f, mis);
2468    qemu_event_set(&mis->main_thread_load_event);
2469
2470    trace_qemu_loadvm_state_post_main(ret);
2471
2472    if (mis->have_listen_thread) {
2473        /* Listen thread still going, can't clean up yet */
2474        return ret;
2475    }
2476
2477    if (ret == 0) {
2478        ret = qemu_file_get_error(f);
2479    }
2480
2481    /*
2482     * Try to read in the VMDESC section as well, so that dumping tools that
2483     * intercept our migration stream have the chance to see it.
2484     */
2485
2486    /* We've got to be careful; if we don't read the data and just shut the fd
2487     * then the sender can error if we close while it's still sending.
2488     * We also mustn't read data that isn't there; some transports (RDMA)
2489     * will stall waiting for that data when the source has already closed.
2490     */
2491    if (ret == 0 && should_send_vmdesc()) {
2492        uint8_t *buf;
2493        uint32_t size;
2494        uint8_t  section_type = qemu_get_byte(f);
2495
2496        if (section_type != QEMU_VM_VMDESCRIPTION) {
2497            error_report("Expected vmdescription section, but got %d",
2498                         section_type);
2499            /*
2500             * It doesn't seem worth failing at this point since
2501             * we apparently have an otherwise valid VM state
2502             */
2503        } else {
2504            buf = g_malloc(0x1000);
2505            size = qemu_get_be32(f);
2506
2507            while (size > 0) {
2508                uint32_t read_chunk = MIN(size, 0x1000);
2509                qemu_get_buffer(f, buf, read_chunk);
2510                size -= read_chunk;
2511            }
2512            g_free(buf);
2513        }
2514    }
2515
2516    qemu_loadvm_state_cleanup();
2517    cpu_synchronize_all_post_init();
2518
2519    return ret;
2520}
2521
2522int qemu_load_device_state(QEMUFile *f)
2523{
2524    MigrationIncomingState *mis = migration_incoming_get_current();
2525    int ret;
2526
2527    /* Load QEMU_VM_SECTION_FULL section */
2528    ret = qemu_loadvm_state_main(f, mis);
2529    if (ret < 0) {
2530        error_report("Failed to load device state: %d", ret);
2531        return ret;
2532    }
2533
2534    cpu_synchronize_all_post_init();
2535    return 0;
2536}
2537
2538int save_snapshot(const char *name, Error **errp)
2539{
2540    BlockDriverState *bs, *bs1;
2541    QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
2542    int ret = -1;
2543    QEMUFile *f;
2544    int saved_vm_running;
2545    uint64_t vm_state_size;
2546    qemu_timeval tv;
2547    struct tm tm;
2548    AioContext *aio_context;
2549
2550    if (migration_is_blocked(errp)) {
2551        return ret;
2552    }
2553
2554    if (!replay_can_snapshot()) {
2555        error_setg(errp, "Record/replay does not allow making snapshot "
2556                   "right now. Try once more later.");
2557        return ret;
2558    }
2559
2560    if (!bdrv_all_can_snapshot(&bs)) {
2561        error_setg(errp, "Device '%s' is writable but does not support "
2562                   "snapshots", bdrv_get_device_name(bs));
2563        return ret;
2564    }
2565
2566    /* Delete old snapshots of the same name */
2567    if (name) {
2568        ret = bdrv_all_delete_snapshot(name, &bs1, errp);
2569        if (ret < 0) {
2570            error_prepend(errp, "Error while deleting snapshot on device "
2571                          "'%s': ", bdrv_get_device_name(bs1));
2572            return ret;
2573        }
2574    }
2575
2576    bs = bdrv_all_find_vmstate_bs();
2577    if (bs == NULL) {
2578        error_setg(errp, "No block device can accept snapshots");
2579        return ret;
2580    }
2581    aio_context = bdrv_get_aio_context(bs);
2582
2583    saved_vm_running = runstate_is_running();
2584
2585    ret = global_state_store();
2586    if (ret) {
2587        error_setg(errp, "Error saving global state");
2588        return ret;
2589    }
2590    vm_stop(RUN_STATE_SAVE_VM);
2591
2592    bdrv_drain_all_begin();
2593
2594    aio_context_acquire(aio_context);
2595
2596    memset(sn, 0, sizeof(*sn));
2597
2598    /* fill auxiliary fields */
2599    qemu_gettimeofday(&tv);
2600    sn->date_sec = tv.tv_sec;
2601    sn->date_nsec = tv.tv_usec * 1000;
2602    sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
2603
2604    if (name) {
2605        ret = bdrv_snapshot_find(bs, old_sn, name);
2606        if (ret >= 0) {
2607            pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2608            pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2609        } else {
2610            pstrcpy(sn->name, sizeof(sn->name), name);
2611        }
2612    } else {
2613        /* cast below needed for OpenBSD where tv_sec is still 'long' */
2614        localtime_r((const time_t *)&tv.tv_sec, &tm);
2615        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
2616    }
2617
2618    /* save the VM state */
2619    f = qemu_fopen_bdrv(bs, 1);
2620    if (!f) {
2621        error_setg(errp, "Could not open VM state file");
2622        goto the_end;
2623    }
2624    ret = qemu_savevm_state(f, errp);
2625    vm_state_size = qemu_ftell(f);
2626    qemu_fclose(f);
2627    if (ret < 0) {
2628        goto the_end;
2629    }
2630
2631    /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
2632     * for itself.  BDRV_POLL_WHILE() does not support nested locking because
2633     * it only releases the lock once.  Therefore synchronous I/O will deadlock
2634     * unless we release the AioContext before bdrv_all_create_snapshot().
2635     */
2636    aio_context_release(aio_context);
2637    aio_context = NULL;
2638
2639    ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2640    if (ret < 0) {
2641        error_setg(errp, "Error while creating snapshot on '%s'",
2642                   bdrv_get_device_name(bs));
2643        goto the_end;
2644    }
2645
2646    ret = 0;
2647
2648 the_end:
2649    if (aio_context) {
2650        aio_context_release(aio_context);
2651    }
2652
2653    bdrv_drain_all_end();
2654
2655    if (saved_vm_running) {
2656        vm_start();
2657    }
2658    return ret;
2659}
2660
2661void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
2662                                Error **errp)
2663{
2664    QEMUFile *f;
2665    QIOChannelFile *ioc;
2666    int saved_vm_running;
2667    int ret;
2668
2669    if (!has_live) {
2670        /* live default to true so old version of Xen tool stack can have a
2671         * successfull live migration */
2672        live = true;
2673    }
2674
2675    saved_vm_running = runstate_is_running();
2676    vm_stop(RUN_STATE_SAVE_VM);
2677    global_state_store_running();
2678
2679    ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
2680    if (!ioc) {
2681        goto the_end;
2682    }
2683    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
2684    f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
2685    object_unref(OBJECT(ioc));
2686    ret = qemu_save_device_state(f);
2687    if (ret < 0 || qemu_fclose(f) < 0) {
2688        error_setg(errp, QERR_IO_ERROR);
2689    } else {
2690        /* libxl calls the QMP command "stop" before calling
2691         * "xen-save-devices-state" and in case of migration failure, libxl
2692         * would call "cont".
2693         * So call bdrv_inactivate_all (release locks) here to let the other
2694         * side of the migration take controle of the images.
2695         */
2696        if (live && !saved_vm_running) {
2697            ret = bdrv_inactivate_all();
2698            if (ret) {
2699                error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
2700                           __func__, ret);
2701            }
2702        }
2703    }
2704
2705 the_end:
2706    if (saved_vm_running) {
2707        vm_start();
2708    }
2709}
2710
2711void qmp_xen_load_devices_state(const char *filename, Error **errp)
2712{
2713    QEMUFile *f;
2714    QIOChannelFile *ioc;
2715    int ret;
2716
2717    /* Guest must be paused before loading the device state; the RAM state
2718     * will already have been loaded by xc
2719     */
2720    if (runstate_is_running()) {
2721        error_setg(errp, "Cannot update device state while vm is running");
2722        return;
2723    }
2724    vm_stop(RUN_STATE_RESTORE_VM);
2725
2726    ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
2727    if (!ioc) {
2728        return;
2729    }
2730    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
2731    f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
2732    object_unref(OBJECT(ioc));
2733
2734    ret = qemu_loadvm_state(f);
2735    qemu_fclose(f);
2736    if (ret < 0) {
2737        error_setg(errp, QERR_IO_ERROR);
2738    }
2739    migration_incoming_state_destroy();
2740}
2741
2742int load_snapshot(const char *name, Error **errp)
2743{
2744    BlockDriverState *bs, *bs_vm_state;
2745    QEMUSnapshotInfo sn;
2746    QEMUFile *f;
2747    int ret;
2748    AioContext *aio_context;
2749    MigrationIncomingState *mis = migration_incoming_get_current();
2750
2751    if (!replay_can_snapshot()) {
2752        error_setg(errp, "Record/replay does not allow loading snapshot "
2753                   "right now. Try once more later.");
2754        return -EINVAL;
2755    }
2756
2757    if (!bdrv_all_can_snapshot(&bs)) {
2758        error_setg(errp,
2759                   "Device '%s' is writable but does not support snapshots",
2760                   bdrv_get_device_name(bs));
2761        return -ENOTSUP;
2762    }
2763    ret = bdrv_all_find_snapshot(name, &bs);
2764    if (ret < 0) {
2765        error_setg(errp,
2766                   "Device '%s' does not have the requested snapshot '%s'",
2767                   bdrv_get_device_name(bs), name);
2768        return ret;
2769    }
2770
2771    bs_vm_state = bdrv_all_find_vmstate_bs();
2772    if (!bs_vm_state) {
2773        error_setg(errp, "No block device supports snapshots");
2774        return -ENOTSUP;
2775    }
2776    aio_context = bdrv_get_aio_context(bs_vm_state);
2777
2778    /* Don't even try to load empty VM states */
2779    aio_context_acquire(aio_context);
2780    ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2781    aio_context_release(aio_context);
2782    if (ret < 0) {
2783        return ret;
2784    } else if (sn.vm_state_size == 0) {
2785        error_setg(errp, "This is a disk-only snapshot. Revert to it "
2786                   " offline using qemu-img");
2787        return -EINVAL;
2788    }
2789
2790    /* Flush all IO requests so they don't interfere with the new state.  */
2791    bdrv_drain_all_begin();
2792
2793    ret = bdrv_all_goto_snapshot(name, &bs, errp);
2794    if (ret < 0) {
2795        error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
2796                      name, bdrv_get_device_name(bs));
2797        goto err_drain;
2798    }
2799
2800    /* restore the VM state */
2801    f = qemu_fopen_bdrv(bs_vm_state, 0);
2802    if (!f) {
2803        error_setg(errp, "Could not open VM state file");
2804        ret = -EINVAL;
2805        goto err_drain;
2806    }
2807
2808    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
2809    mis->from_src_file = f;
2810
2811    aio_context_acquire(aio_context);
2812    ret = qemu_loadvm_state(f);
2813    migration_incoming_state_destroy();
2814    aio_context_release(aio_context);
2815
2816    bdrv_drain_all_end();
2817
2818    if (ret < 0) {
2819        error_setg(errp, "Error %d while loading VM state", ret);
2820        return ret;
2821    }
2822
2823    return 0;
2824
2825err_drain:
2826    bdrv_drain_all_end();
2827    return ret;
2828}
2829
2830void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2831{
2832    qemu_ram_set_idstr(mr->ram_block,
2833                       memory_region_name(mr), dev);
2834    qemu_ram_set_migratable(mr->ram_block);
2835}
2836
2837void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2838{
2839    qemu_ram_unset_idstr(mr->ram_block);
2840    qemu_ram_unset_migratable(mr->ram_block);
2841}
2842
2843void vmstate_register_ram_global(MemoryRegion *mr)
2844{
2845    vmstate_register_ram(mr, NULL);
2846}
2847
2848bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
2849{
2850    /* check needed if --only-migratable is specified */
2851    if (!only_migratable) {
2852        return true;
2853    }
2854
2855    return !(vmsd && vmsd->unmigratable);
2856}
2857