qemu/migration/savevm.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2009-2015 Red Hat Inc
   6 *
   7 * Authors:
   8 *  Juan Quintela <quintela@redhat.com>
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include "hw/boards.h"
  31#include "hw/hw.h"
  32#include "hw/qdev.h"
  33#include "net/net.h"
  34#include "monitor/monitor.h"
  35#include "sysemu/sysemu.h"
  36#include "qemu/timer.h"
  37#include "audio/audio.h"
  38#include "migration/migration.h"
  39#include "migration/postcopy-ram.h"
  40#include "qapi/qmp/qerror.h"
  41#include "qemu/error-report.h"
  42#include "qemu/sockets.h"
  43#include "qemu/queue.h"
  44#include "sysemu/cpus.h"
  45#include "exec/memory.h"
  46#include "qmp-commands.h"
  47#include "trace.h"
  48#include "qemu/bitops.h"
  49#include "qemu/iov.h"
  50#include "block/snapshot.h"
  51#include "block/qapi.h"
  52#include "qemu/cutils.h"
  53
  54#ifndef ETH_P_RARP
  55#define ETH_P_RARP 0x8035
  56#endif
  57#define ARP_HTYPE_ETH 0x0001
  58#define ARP_PTYPE_IP 0x0800
  59#define ARP_OP_REQUEST_REV 0x3
  60
  61const unsigned int postcopy_ram_discard_version = 0;
  62
  63static bool skip_section_footers;
  64
  65static struct mig_cmd_args {
  66    ssize_t     len; /* -1 = variable */
  67    const char *name;
  68} mig_cmd_args[] = {
  69    [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
  70    [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
  71    [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
  72    [MIG_CMD_POSTCOPY_ADVISE]  = { .len = 16, .name = "POSTCOPY_ADVISE" },
  73    [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
  74    [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
  75    [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  76                                   .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
  77    [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
  78    [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
  79};
  80
  81static int announce_self_create(uint8_t *buf,
  82                                uint8_t *mac_addr)
  83{
  84    /* Ethernet header. */
  85    memset(buf, 0xff, 6);         /* destination MAC addr */
  86    memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
  87    *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
  88
  89    /* RARP header. */
  90    *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
  91    *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
  92    *(buf + 18) = 6; /* hardware addr length (ethernet) */
  93    *(buf + 19) = 4; /* protocol addr length (IPv4) */
  94    *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
  95    memcpy(buf + 22, mac_addr, 6); /* source hw addr */
  96    memset(buf + 28, 0x00, 4);     /* source protocol addr */
  97    memcpy(buf + 32, mac_addr, 6); /* target hw addr */
  98    memset(buf + 38, 0x00, 4);     /* target protocol addr */
  99
 100    /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
 101    memset(buf + 42, 0x00, 18);
 102
 103    return 60; /* len (FCS will be added by hardware) */
 104}
 105
 106static void qemu_announce_self_iter(NICState *nic, void *opaque)
 107{
 108    uint8_t buf[60];
 109    int len;
 110
 111    trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
 112    len = announce_self_create(buf, nic->conf->macaddr.a);
 113
 114    qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
 115}
 116
 117
 118static void qemu_announce_self_once(void *opaque)
 119{
 120    static int count = SELF_ANNOUNCE_ROUNDS;
 121    QEMUTimer *timer = *(QEMUTimer **)opaque;
 122
 123    qemu_foreach_nic(qemu_announce_self_iter, NULL);
 124
 125    if (--count) {
 126        /* delay 50ms, 150ms, 250ms, ... */
 127        timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
 128                  self_announce_delay(count));
 129    } else {
 130            timer_del(timer);
 131            timer_free(timer);
 132    }
 133}
 134
 135void qemu_announce_self(void)
 136{
 137    static QEMUTimer *timer;
 138    timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
 139    qemu_announce_self_once(&timer);
 140}
 141
 142/***********************************************************/
 143/* savevm/loadvm support */
 144
 145static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
 146                                   int64_t pos)
 147{
 148    int ret;
 149    QEMUIOVector qiov;
 150
 151    qemu_iovec_init_external(&qiov, iov, iovcnt);
 152    ret = bdrv_writev_vmstate(opaque, &qiov, pos);
 153    if (ret < 0) {
 154        return ret;
 155    }
 156
 157    return qiov.size;
 158}
 159
 160static ssize_t block_put_buffer(void *opaque, const uint8_t *buf,
 161                                int64_t pos, size_t size)
 162{
 163    bdrv_save_vmstate(opaque, buf, pos, size);
 164    return size;
 165}
 166
 167static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
 168                                size_t size)
 169{
 170    return bdrv_load_vmstate(opaque, buf, pos, size);
 171}
 172
 173static int bdrv_fclose(void *opaque)
 174{
 175    return bdrv_flush(opaque);
 176}
 177
 178static const QEMUFileOps bdrv_read_ops = {
 179    .get_buffer = block_get_buffer,
 180    .close =      bdrv_fclose
 181};
 182
 183static const QEMUFileOps bdrv_write_ops = {
 184    .put_buffer     = block_put_buffer,
 185    .writev_buffer  = block_writev_buffer,
 186    .close          = bdrv_fclose
 187};
 188
 189static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 190{
 191    if (is_writable) {
 192        return qemu_fopen_ops(bs, &bdrv_write_ops);
 193    }
 194    return qemu_fopen_ops(bs, &bdrv_read_ops);
 195}
 196
 197
 198/* QEMUFile timer support.
 199 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
 200 */
 201
 202void timer_put(QEMUFile *f, QEMUTimer *ts)
 203{
 204    uint64_t expire_time;
 205
 206    expire_time = timer_expire_time_ns(ts);
 207    qemu_put_be64(f, expire_time);
 208}
 209
 210void timer_get(QEMUFile *f, QEMUTimer *ts)
 211{
 212    uint64_t expire_time;
 213
 214    expire_time = qemu_get_be64(f);
 215    if (expire_time != -1) {
 216        timer_mod_ns(ts, expire_time);
 217    } else {
 218        timer_del(ts);
 219    }
 220}
 221
 222
 223/* VMState timer support.
 224 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
 225 */
 226
 227static int get_timer(QEMUFile *f, void *pv, size_t size)
 228{
 229    QEMUTimer *v = pv;
 230    timer_get(f, v);
 231    return 0;
 232}
 233
 234static void put_timer(QEMUFile *f, void *pv, size_t size)
 235{
 236    QEMUTimer *v = pv;
 237    timer_put(f, v);
 238}
 239
 240const VMStateInfo vmstate_info_timer = {
 241    .name = "timer",
 242    .get  = get_timer,
 243    .put  = put_timer,
 244};
 245
 246
 247typedef struct CompatEntry {
 248    char idstr[256];
 249    int instance_id;
 250} CompatEntry;
 251
 252typedef struct SaveStateEntry {
 253    QTAILQ_ENTRY(SaveStateEntry) entry;
 254    char idstr[256];
 255    int instance_id;
 256    int alias_id;
 257    int version_id;
 258    int section_id;
 259    SaveVMHandlers *ops;
 260    const VMStateDescription *vmsd;
 261    void *opaque;
 262    CompatEntry *compat;
 263    int is_ram;
 264} SaveStateEntry;
 265
 266typedef struct SaveState {
 267    QTAILQ_HEAD(, SaveStateEntry) handlers;
 268    int global_section_id;
 269    bool skip_configuration;
 270    uint32_t len;
 271    const char *name;
 272} SaveState;
 273
 274static SaveState savevm_state = {
 275    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
 276    .global_section_id = 0,
 277    .skip_configuration = false,
 278};
 279
 280void savevm_skip_configuration(void)
 281{
 282    savevm_state.skip_configuration = true;
 283}
 284
 285
 286static void configuration_pre_save(void *opaque)
 287{
 288    SaveState *state = opaque;
 289    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 290
 291    state->len = strlen(current_name);
 292    state->name = current_name;
 293}
 294
 295static int configuration_post_load(void *opaque, int version_id)
 296{
 297    SaveState *state = opaque;
 298    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
 299
 300    if (strncmp(state->name, current_name, state->len) != 0) {
 301        error_report("Machine type received is '%.*s' and local is '%s'",
 302                     (int) state->len, state->name, current_name);
 303        return -EINVAL;
 304    }
 305    return 0;
 306}
 307
 308static const VMStateDescription vmstate_configuration = {
 309    .name = "configuration",
 310    .version_id = 1,
 311    .post_load = configuration_post_load,
 312    .pre_save = configuration_pre_save,
 313    .fields = (VMStateField[]) {
 314        VMSTATE_UINT32(len, SaveState),
 315        VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
 316        VMSTATE_END_OF_LIST()
 317    },
 318};
 319
 320static void dump_vmstate_vmsd(FILE *out_file,
 321                              const VMStateDescription *vmsd, int indent,
 322                              bool is_subsection);
 323
 324static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
 325                              int indent)
 326{
 327    fprintf(out_file, "%*s{\n", indent, "");
 328    indent += 2;
 329    fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
 330    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 331            field->version_id);
 332    fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
 333            field->field_exists ? "true" : "false");
 334    fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
 335    if (field->vmsd != NULL) {
 336        fprintf(out_file, ",\n");
 337        dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
 338    }
 339    fprintf(out_file, "\n%*s}", indent - 2, "");
 340}
 341
 342static void dump_vmstate_vmss(FILE *out_file,
 343                              const VMStateDescription **subsection,
 344                              int indent)
 345{
 346    if (*subsection != NULL) {
 347        dump_vmstate_vmsd(out_file, *subsection, indent, true);
 348    }
 349}
 350
 351static void dump_vmstate_vmsd(FILE *out_file,
 352                              const VMStateDescription *vmsd, int indent,
 353                              bool is_subsection)
 354{
 355    if (is_subsection) {
 356        fprintf(out_file, "%*s{\n", indent, "");
 357    } else {
 358        fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
 359    }
 360    indent += 2;
 361    fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
 362    fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 363            vmsd->version_id);
 364    fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
 365            vmsd->minimum_version_id);
 366    if (vmsd->fields != NULL) {
 367        const VMStateField *field = vmsd->fields;
 368        bool first;
 369
 370        fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
 371        first = true;
 372        while (field->name != NULL) {
 373            if (field->flags & VMS_MUST_EXIST) {
 374                /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
 375                field++;
 376                continue;
 377            }
 378            if (!first) {
 379                fprintf(out_file, ",\n");
 380            }
 381            dump_vmstate_vmsf(out_file, field, indent + 2);
 382            field++;
 383            first = false;
 384        }
 385        fprintf(out_file, "\n%*s]", indent, "");
 386    }
 387    if (vmsd->subsections != NULL) {
 388        const VMStateDescription **subsection = vmsd->subsections;
 389        bool first;
 390
 391        fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
 392        first = true;
 393        while (*subsection != NULL) {
 394            if (!first) {
 395                fprintf(out_file, ",\n");
 396            }
 397            dump_vmstate_vmss(out_file, subsection, indent + 2);
 398            subsection++;
 399            first = false;
 400        }
 401        fprintf(out_file, "\n%*s]", indent, "");
 402    }
 403    fprintf(out_file, "\n%*s}", indent - 2, "");
 404}
 405
 406static void dump_machine_type(FILE *out_file)
 407{
 408    MachineClass *mc;
 409
 410    mc = MACHINE_GET_CLASS(current_machine);
 411
 412    fprintf(out_file, "  \"vmschkmachine\": {\n");
 413    fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
 414    fprintf(out_file, "  },\n");
 415}
 416
 417void dump_vmstate_json_to_file(FILE *out_file)
 418{
 419    GSList *list, *elt;
 420    bool first;
 421
 422    fprintf(out_file, "{\n");
 423    dump_machine_type(out_file);
 424
 425    first = true;
 426    list = object_class_get_list(TYPE_DEVICE, true);
 427    for (elt = list; elt; elt = elt->next) {
 428        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
 429                                             TYPE_DEVICE);
 430        const char *name;
 431        int indent = 2;
 432
 433        if (!dc->vmsd) {
 434            continue;
 435        }
 436
 437        if (!first) {
 438            fprintf(out_file, ",\n");
 439        }
 440        name = object_class_get_name(OBJECT_CLASS(dc));
 441        fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
 442        indent += 2;
 443        fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
 444        fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
 445                dc->vmsd->version_id);
 446        fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
 447                dc->vmsd->minimum_version_id);
 448
 449        dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
 450
 451        fprintf(out_file, "\n%*s}", indent - 2, "");
 452        first = false;
 453    }
 454    fprintf(out_file, "\n}\n");
 455    fclose(out_file);
 456}
 457
 458static int calculate_new_instance_id(const char *idstr)
 459{
 460    SaveStateEntry *se;
 461    int instance_id = 0;
 462
 463    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 464        if (strcmp(idstr, se->idstr) == 0
 465            && instance_id <= se->instance_id) {
 466            instance_id = se->instance_id + 1;
 467        }
 468    }
 469    return instance_id;
 470}
 471
 472static int calculate_compat_instance_id(const char *idstr)
 473{
 474    SaveStateEntry *se;
 475    int instance_id = 0;
 476
 477    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 478        if (!se->compat) {
 479            continue;
 480        }
 481
 482        if (strcmp(idstr, se->compat->idstr) == 0
 483            && instance_id <= se->compat->instance_id) {
 484            instance_id = se->compat->instance_id + 1;
 485        }
 486    }
 487    return instance_id;
 488}
 489
 490/* TODO: Individual devices generally have very little idea about the rest
 491   of the system, so instance_id should be removed/replaced.
 492   Meanwhile pass -1 as instance_id if you do not already have a clearly
 493   distinguishing id for all instances of your device class. */
 494int register_savevm_live(DeviceState *dev,
 495                         const char *idstr,
 496                         int instance_id,
 497                         int version_id,
 498                         SaveVMHandlers *ops,
 499                         void *opaque)
 500{
 501    SaveStateEntry *se;
 502
 503    se = g_new0(SaveStateEntry, 1);
 504    se->version_id = version_id;
 505    se->section_id = savevm_state.global_section_id++;
 506    se->ops = ops;
 507    se->opaque = opaque;
 508    se->vmsd = NULL;
 509    /* if this is a live_savem then set is_ram */
 510    if (ops->save_live_setup != NULL) {
 511        se->is_ram = 1;
 512    }
 513
 514    if (dev) {
 515        char *id = qdev_get_dev_path(dev);
 516        if (id) {
 517            pstrcpy(se->idstr, sizeof(se->idstr), id);
 518            pstrcat(se->idstr, sizeof(se->idstr), "/");
 519            g_free(id);
 520
 521            se->compat = g_new0(CompatEntry, 1);
 522            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
 523            se->compat->instance_id = instance_id == -1 ?
 524                         calculate_compat_instance_id(idstr) : instance_id;
 525            instance_id = -1;
 526        }
 527    }
 528    pstrcat(se->idstr, sizeof(se->idstr), idstr);
 529
 530    if (instance_id == -1) {
 531        se->instance_id = calculate_new_instance_id(se->idstr);
 532    } else {
 533        se->instance_id = instance_id;
 534    }
 535    assert(!se->compat || se->instance_id == 0);
 536    /* add at the end of list */
 537    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
 538    return 0;
 539}
 540
 541int register_savevm(DeviceState *dev,
 542                    const char *idstr,
 543                    int instance_id,
 544                    int version_id,
 545                    SaveStateHandler *save_state,
 546                    LoadStateHandler *load_state,
 547                    void *opaque)
 548{
 549    SaveVMHandlers *ops = g_new0(SaveVMHandlers, 1);
 550    ops->save_state = save_state;
 551    ops->load_state = load_state;
 552    return register_savevm_live(dev, idstr, instance_id, version_id,
 553                                ops, opaque);
 554}
 555
 556void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
 557{
 558    SaveStateEntry *se, *new_se;
 559    char id[256] = "";
 560
 561    if (dev) {
 562        char *path = qdev_get_dev_path(dev);
 563        if (path) {
 564            pstrcpy(id, sizeof(id), path);
 565            pstrcat(id, sizeof(id), "/");
 566            g_free(path);
 567        }
 568    }
 569    pstrcat(id, sizeof(id), idstr);
 570
 571    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 572        if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
 573            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 574            g_free(se->compat);
 575            g_free(se->ops);
 576            g_free(se);
 577        }
 578    }
 579}
 580
 581int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 582                                   const VMStateDescription *vmsd,
 583                                   void *opaque, int alias_id,
 584                                   int required_for_version)
 585{
 586    SaveStateEntry *se;
 587
 588    /* If this triggers, alias support can be dropped for the vmsd. */
 589    assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
 590
 591    se = g_new0(SaveStateEntry, 1);
 592    se->version_id = vmsd->version_id;
 593    se->section_id = savevm_state.global_section_id++;
 594    se->opaque = opaque;
 595    se->vmsd = vmsd;
 596    se->alias_id = alias_id;
 597
 598    if (dev) {
 599        char *id = qdev_get_dev_path(dev);
 600        if (id) {
 601            pstrcpy(se->idstr, sizeof(se->idstr), id);
 602            pstrcat(se->idstr, sizeof(se->idstr), "/");
 603            g_free(id);
 604
 605            se->compat = g_new0(CompatEntry, 1);
 606            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
 607            se->compat->instance_id = instance_id == -1 ?
 608                         calculate_compat_instance_id(vmsd->name) : instance_id;
 609            instance_id = -1;
 610        }
 611    }
 612    pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
 613
 614    if (instance_id == -1) {
 615        se->instance_id = calculate_new_instance_id(se->idstr);
 616    } else {
 617        se->instance_id = instance_id;
 618    }
 619    assert(!se->compat || se->instance_id == 0);
 620    /* add at the end of list */
 621    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
 622    return 0;
 623}
 624
 625void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
 626                        void *opaque)
 627{
 628    SaveStateEntry *se, *new_se;
 629
 630    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 631        if (se->vmsd == vmsd && se->opaque == opaque) {
 632            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 633            g_free(se->compat);
 634            g_free(se);
 635        }
 636    }
 637}
 638
 639static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
 640{
 641    trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 642    if (!se->vmsd) {         /* Old style */
 643        return se->ops->load_state(f, se->opaque, version_id);
 644    }
 645    return vmstate_load_state(f, se->vmsd, se->opaque, version_id);
 646}
 647
 648static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 649{
 650    int64_t old_offset, size;
 651
 652    old_offset = qemu_ftell_fast(f);
 653    se->ops->save_state(f, se->opaque);
 654    size = qemu_ftell_fast(f) - old_offset;
 655
 656    if (vmdesc) {
 657        json_prop_int(vmdesc, "size", size);
 658        json_start_array(vmdesc, "fields");
 659        json_start_object(vmdesc, NULL);
 660        json_prop_str(vmdesc, "name", "data");
 661        json_prop_int(vmdesc, "size", size);
 662        json_prop_str(vmdesc, "type", "buffer");
 663        json_end_object(vmdesc);
 664        json_end_array(vmdesc);
 665    }
 666}
 667
 668static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
 669{
 670    trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
 671    if (!se->vmsd) {
 672        vmstate_save_old_style(f, se, vmdesc);
 673        return;
 674    }
 675    vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 676}
 677
 678void savevm_skip_section_footers(void)
 679{
 680    skip_section_footers = true;
 681}
 682
 683/*
 684 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
 685 */
 686static void save_section_header(QEMUFile *f, SaveStateEntry *se,
 687                                uint8_t section_type)
 688{
 689    qemu_put_byte(f, section_type);
 690    qemu_put_be32(f, se->section_id);
 691
 692    if (section_type == QEMU_VM_SECTION_FULL ||
 693        section_type == QEMU_VM_SECTION_START) {
 694        /* ID string */
 695        size_t len = strlen(se->idstr);
 696        qemu_put_byte(f, len);
 697        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
 698
 699        qemu_put_be32(f, se->instance_id);
 700        qemu_put_be32(f, se->version_id);
 701    }
 702}
 703
 704/*
 705 * Write a footer onto device sections that catches cases misformatted device
 706 * sections.
 707 */
 708static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
 709{
 710    if (!skip_section_footers) {
 711        qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
 712        qemu_put_be32(f, se->section_id);
 713    }
 714}
 715
 716/**
 717 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
 718 *                           command and associated data.
 719 *
 720 * @f: File to send command on
 721 * @command: Command type to send
 722 * @len: Length of associated data
 723 * @data: Data associated with command.
 724 */
 725void qemu_savevm_command_send(QEMUFile *f,
 726                              enum qemu_vm_cmd command,
 727                              uint16_t len,
 728                              uint8_t *data)
 729{
 730    trace_savevm_command_send(command, len);
 731    qemu_put_byte(f, QEMU_VM_COMMAND);
 732    qemu_put_be16(f, (uint16_t)command);
 733    qemu_put_be16(f, len);
 734    qemu_put_buffer(f, data, len);
 735    qemu_fflush(f);
 736}
 737
 738void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
 739{
 740    uint32_t buf;
 741
 742    trace_savevm_send_ping(value);
 743    buf = cpu_to_be32(value);
 744    qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
 745}
 746
 747void qemu_savevm_send_open_return_path(QEMUFile *f)
 748{
 749    trace_savevm_send_open_return_path();
 750    qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
 751}
 752
 753/* We have a buffer of data to send; we don't want that all to be loaded
 754 * by the command itself, so the command contains just the length of the
 755 * extra buffer that we then send straight after it.
 756 * TODO: Must be a better way to organise that
 757 *
 758 * Returns:
 759 *    0 on success
 760 *    -ve on error
 761 */
 762int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
 763{
 764    size_t cur_iov;
 765    size_t len = qsb_get_length(qsb);
 766    uint32_t tmp;
 767
 768    if (len > MAX_VM_CMD_PACKAGED_SIZE) {
 769        error_report("%s: Unreasonably large packaged state: %zu",
 770                     __func__, len);
 771        return -1;
 772    }
 773
 774    tmp = cpu_to_be32(len);
 775
 776    trace_qemu_savevm_send_packaged();
 777    qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
 778
 779    /* all the data follows (concatinating the iov's) */
 780    for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
 781        /* The iov entries are partially filled */
 782        size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
 783        len -= towrite;
 784
 785        if (!towrite) {
 786            break;
 787        }
 788
 789        qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
 790    }
 791
 792    return 0;
 793}
 794
 795/* Send prior to any postcopy transfer */
 796void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 797{
 798    uint64_t tmp[2];
 799    tmp[0] = cpu_to_be64(getpagesize());
 800    tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
 801
 802    trace_qemu_savevm_send_postcopy_advise();
 803    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
 804}
 805
 806/* Sent prior to starting the destination running in postcopy, discard pages
 807 * that have already been sent but redirtied on the source.
 808 * CMD_POSTCOPY_RAM_DISCARD consist of:
 809 *      byte   version (0)
 810 *      byte   Length of name field (not including 0)
 811 *  n x byte   RAM block name
 812 *      byte   0 terminator (just for safety)
 813 *  n x        Byte ranges within the named RAMBlock
 814 *      be64   Start of the range
 815 *      be64   Length
 816 *
 817 *  name:  RAMBlock name that these entries are part of
 818 *  len: Number of page entries
 819 *  start_list: 'len' addresses
 820 *  length_list: 'len' addresses
 821 *
 822 */
 823void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
 824                                           uint16_t len,
 825                                           uint64_t *start_list,
 826                                           uint64_t *length_list)
 827{
 828    uint8_t *buf;
 829    uint16_t tmplen;
 830    uint16_t t;
 831    size_t name_len = strlen(name);
 832
 833    trace_qemu_savevm_send_postcopy_ram_discard(name, len);
 834    assert(name_len < 256);
 835    buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
 836    buf[0] = postcopy_ram_discard_version;
 837    buf[1] = name_len;
 838    memcpy(buf + 2, name, name_len);
 839    tmplen = 2 + name_len;
 840    buf[tmplen++] = '\0';
 841
 842    for (t = 0; t < len; t++) {
 843        cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]);
 844        tmplen += 8;
 845        cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]);
 846        tmplen += 8;
 847    }
 848    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
 849    g_free(buf);
 850}
 851
 852/* Get the destination into a state where it can receive postcopy data. */
 853void qemu_savevm_send_postcopy_listen(QEMUFile *f)
 854{
 855    trace_savevm_send_postcopy_listen();
 856    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
 857}
 858
 859/* Kick the destination into running */
 860void qemu_savevm_send_postcopy_run(QEMUFile *f)
 861{
 862    trace_savevm_send_postcopy_run();
 863    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
 864}
 865
 866bool qemu_savevm_state_blocked(Error **errp)
 867{
 868    SaveStateEntry *se;
 869
 870    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 871        if (se->vmsd && se->vmsd->unmigratable) {
 872            error_setg(errp, "State blocked by non-migratable device '%s'",
 873                       se->idstr);
 874            return true;
 875        }
 876    }
 877    return false;
 878}
 879
 880static bool enforce_config_section(void)
 881{
 882    MachineState *machine = MACHINE(qdev_get_machine());
 883    return machine->enforce_config_section;
 884}
 885
 886void qemu_savevm_state_header(QEMUFile *f)
 887{
 888    trace_savevm_state_header();
 889    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
 890    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 891
 892    if (!savevm_state.skip_configuration || enforce_config_section()) {
 893        qemu_put_byte(f, QEMU_VM_CONFIGURATION);
 894        vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
 895    }
 896
 897}
 898
 899void qemu_savevm_state_begin(QEMUFile *f,
 900                             const MigrationParams *params)
 901{
 902    SaveStateEntry *se;
 903    int ret;
 904
 905    trace_savevm_state_begin();
 906    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 907        if (!se->ops || !se->ops->set_params) {
 908            continue;
 909        }
 910        se->ops->set_params(params, se->opaque);
 911    }
 912
 913    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 914        if (!se->ops || !se->ops->save_live_setup) {
 915            continue;
 916        }
 917        if (se->ops && se->ops->is_active) {
 918            if (!se->ops->is_active(se->opaque)) {
 919                continue;
 920            }
 921        }
 922        save_section_header(f, se, QEMU_VM_SECTION_START);
 923
 924        ret = se->ops->save_live_setup(f, se->opaque);
 925        save_section_footer(f, se);
 926        if (ret < 0) {
 927            qemu_file_set_error(f, ret);
 928            break;
 929        }
 930    }
 931}
 932
 933/*
 934 * this function has three return values:
 935 *   negative: there was one error, and we have -errno.
 936 *   0 : We haven't finished, caller have to go again
 937 *   1 : We have finished, we can go to complete phase
 938 */
 939int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 940{
 941    SaveStateEntry *se;
 942    int ret = 1;
 943
 944    trace_savevm_state_iterate();
 945    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 946        if (!se->ops || !se->ops->save_live_iterate) {
 947            continue;
 948        }
 949        if (se->ops && se->ops->is_active) {
 950            if (!se->ops->is_active(se->opaque)) {
 951                continue;
 952            }
 953        }
 954        /*
 955         * In the postcopy phase, any device that doesn't know how to
 956         * do postcopy should have saved it's state in the _complete
 957         * call that's already run, it might get confused if we call
 958         * iterate afterwards.
 959         */
 960        if (postcopy && !se->ops->save_live_complete_postcopy) {
 961            continue;
 962        }
 963        if (qemu_file_rate_limit(f)) {
 964            return 0;
 965        }
 966        trace_savevm_section_start(se->idstr, se->section_id);
 967
 968        save_section_header(f, se, QEMU_VM_SECTION_PART);
 969
 970        ret = se->ops->save_live_iterate(f, se->opaque);
 971        trace_savevm_section_end(se->idstr, se->section_id, ret);
 972        save_section_footer(f, se);
 973
 974        if (ret < 0) {
 975            qemu_file_set_error(f, ret);
 976        }
 977        if (ret <= 0) {
 978            /* Do not proceed to the next vmstate before this one reported
 979               completion of the current stage. This serializes the migration
 980               and reduces the probability that a faster changing state is
 981               synchronized over and over again. */
 982            break;
 983        }
 984    }
 985    return ret;
 986}
 987
 988static bool should_send_vmdesc(void)
 989{
 990    MachineState *machine = MACHINE(qdev_get_machine());
 991    bool in_postcopy = migration_in_postcopy(migrate_get_current());
 992    return !machine->suppress_vmdesc && !in_postcopy;
 993}
 994
 995/*
 996 * Calls the save_live_complete_postcopy methods
 997 * causing the last few pages to be sent immediately and doing any associated
 998 * cleanup.
 999 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1000 * all the other devices, but that happens at the point we switch to postcopy.
1001 */
1002void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1003{
1004    SaveStateEntry *se;
1005    int ret;
1006
1007    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1008        if (!se->ops || !se->ops->save_live_complete_postcopy) {
1009            continue;
1010        }
1011        if (se->ops && se->ops->is_active) {
1012            if (!se->ops->is_active(se->opaque)) {
1013                continue;
1014            }
1015        }
1016        trace_savevm_section_start(se->idstr, se->section_id);
1017        /* Section type */
1018        qemu_put_byte(f, QEMU_VM_SECTION_END);
1019        qemu_put_be32(f, se->section_id);
1020
1021        ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1022        trace_savevm_section_end(se->idstr, se->section_id, ret);
1023        save_section_footer(f, se);
1024        if (ret < 0) {
1025            qemu_file_set_error(f, ret);
1026            return;
1027        }
1028    }
1029
1030    qemu_put_byte(f, QEMU_VM_EOF);
1031    qemu_fflush(f);
1032}
1033
1034void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
1035{
1036    QJSON *vmdesc;
1037    int vmdesc_len;
1038    SaveStateEntry *se;
1039    int ret;
1040    bool in_postcopy = migration_in_postcopy(migrate_get_current());
1041
1042    trace_savevm_state_complete_precopy();
1043
1044    cpu_synchronize_all_states();
1045
1046    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1047        if (!se->ops ||
1048            (in_postcopy && se->ops->save_live_complete_postcopy) ||
1049            (in_postcopy && !iterable_only) ||
1050            !se->ops->save_live_complete_precopy) {
1051            continue;
1052        }
1053
1054        if (se->ops && se->ops->is_active) {
1055            if (!se->ops->is_active(se->opaque)) {
1056                continue;
1057            }
1058        }
1059        trace_savevm_section_start(se->idstr, se->section_id);
1060
1061        save_section_header(f, se, QEMU_VM_SECTION_END);
1062
1063        ret = se->ops->save_live_complete_precopy(f, se->opaque);
1064        trace_savevm_section_end(se->idstr, se->section_id, ret);
1065        save_section_footer(f, se);
1066        if (ret < 0) {
1067            qemu_file_set_error(f, ret);
1068            return;
1069        }
1070    }
1071
1072    if (iterable_only) {
1073        return;
1074    }
1075
1076    vmdesc = qjson_new();
1077    json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
1078    json_start_array(vmdesc, "devices");
1079    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1080
1081        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1082            continue;
1083        }
1084        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1085            trace_savevm_section_skip(se->idstr, se->section_id);
1086            continue;
1087        }
1088
1089        trace_savevm_section_start(se->idstr, se->section_id);
1090
1091        json_start_object(vmdesc, NULL);
1092        json_prop_str(vmdesc, "name", se->idstr);
1093        json_prop_int(vmdesc, "instance_id", se->instance_id);
1094
1095        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1096        vmstate_save(f, se, vmdesc);
1097        trace_savevm_section_end(se->idstr, se->section_id, 0);
1098        save_section_footer(f, se);
1099
1100        json_end_object(vmdesc);
1101    }
1102
1103    if (!in_postcopy) {
1104        /* Postcopy stream will still be going */
1105        qemu_put_byte(f, QEMU_VM_EOF);
1106    }
1107
1108    json_end_array(vmdesc);
1109    qjson_finish(vmdesc);
1110    vmdesc_len = strlen(qjson_get_str(vmdesc));
1111
1112    if (should_send_vmdesc()) {
1113        qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1114        qemu_put_be32(f, vmdesc_len);
1115        qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1116    }
1117    object_unref(OBJECT(vmdesc));
1118
1119    qemu_fflush(f);
1120}
1121
1122/* Give an estimate of the amount left to be transferred,
1123 * the result is split into the amount for units that can and
1124 * for units that can't do postcopy.
1125 */
1126void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
1127                               uint64_t *res_non_postcopiable,
1128                               uint64_t *res_postcopiable)
1129{
1130    SaveStateEntry *se;
1131
1132    *res_non_postcopiable = 0;
1133    *res_postcopiable = 0;
1134
1135
1136    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1137        if (!se->ops || !se->ops->save_live_pending) {
1138            continue;
1139        }
1140        if (se->ops && se->ops->is_active) {
1141            if (!se->ops->is_active(se->opaque)) {
1142                continue;
1143            }
1144        }
1145        se->ops->save_live_pending(f, se->opaque, max_size,
1146                                   res_non_postcopiable, res_postcopiable);
1147    }
1148}
1149
1150void qemu_savevm_state_cleanup(void)
1151{
1152    SaveStateEntry *se;
1153
1154    trace_savevm_state_cleanup();
1155    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1156        if (se->ops && se->ops->cleanup) {
1157            se->ops->cleanup(se->opaque);
1158        }
1159    }
1160}
1161
1162static int qemu_savevm_state(QEMUFile *f, Error **errp)
1163{
1164    int ret;
1165    MigrationParams params = {
1166        .blk = 0,
1167        .shared = 0
1168    };
1169    MigrationState *ms = migrate_init(&params);
1170    ms->to_dst_file = f;
1171
1172    if (qemu_savevm_state_blocked(errp)) {
1173        return -EINVAL;
1174    }
1175
1176    qemu_mutex_unlock_iothread();
1177    qemu_savevm_state_header(f);
1178    qemu_savevm_state_begin(f, &params);
1179    qemu_mutex_lock_iothread();
1180
1181    while (qemu_file_get_error(f) == 0) {
1182        if (qemu_savevm_state_iterate(f, false) > 0) {
1183            break;
1184        }
1185    }
1186
1187    ret = qemu_file_get_error(f);
1188    if (ret == 0) {
1189        qemu_savevm_state_complete_precopy(f, false);
1190        ret = qemu_file_get_error(f);
1191    }
1192    qemu_savevm_state_cleanup();
1193    if (ret != 0) {
1194        error_setg_errno(errp, -ret, "Error while writing VM state");
1195    }
1196    return ret;
1197}
1198
1199static int qemu_save_device_state(QEMUFile *f)
1200{
1201    SaveStateEntry *se;
1202
1203    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1204    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1205
1206    cpu_synchronize_all_states();
1207
1208    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1209        if (se->is_ram) {
1210            continue;
1211        }
1212        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1213            continue;
1214        }
1215        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1216            continue;
1217        }
1218
1219        save_section_header(f, se, QEMU_VM_SECTION_FULL);
1220
1221        vmstate_save(f, se, NULL);
1222
1223        save_section_footer(f, se);
1224    }
1225
1226    qemu_put_byte(f, QEMU_VM_EOF);
1227
1228    return qemu_file_get_error(f);
1229}
1230
1231static SaveStateEntry *find_se(const char *idstr, int instance_id)
1232{
1233    SaveStateEntry *se;
1234
1235    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1236        if (!strcmp(se->idstr, idstr) &&
1237            (instance_id == se->instance_id ||
1238             instance_id == se->alias_id))
1239            return se;
1240        /* Migrating from an older version? */
1241        if (strstr(se->idstr, idstr) && se->compat) {
1242            if (!strcmp(se->compat->idstr, idstr) &&
1243                (instance_id == se->compat->instance_id ||
1244                 instance_id == se->alias_id))
1245                return se;
1246        }
1247    }
1248    return NULL;
1249}
1250
1251enum LoadVMExitCodes {
1252    /* Allow a command to quit all layers of nested loadvm loops */
1253    LOADVM_QUIT     =  1,
1254};
1255
1256static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
1257
1258/* ------ incoming postcopy messages ------ */
1259/* 'advise' arrives before any transfers just to tell us that a postcopy
1260 * *might* happen - it might be skipped if precopy transferred everything
1261 * quickly.
1262 */
1263static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
1264{
1265    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1266    uint64_t remote_hps, remote_tps;
1267
1268    trace_loadvm_postcopy_handle_advise();
1269    if (ps != POSTCOPY_INCOMING_NONE) {
1270        error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1271        return -1;
1272    }
1273
1274    if (!postcopy_ram_supported_by_host()) {
1275        return -1;
1276    }
1277
1278    remote_hps = qemu_get_be64(mis->from_src_file);
1279    if (remote_hps != getpagesize())  {
1280        /*
1281         * Some combinations of mismatch are probably possible but it gets
1282         * a bit more complicated.  In particular we need to place whole
1283         * host pages on the dest at once, and we need to ensure that we
1284         * handle dirtying to make sure we never end up sending part of
1285         * a hostpage on it's own.
1286         */
1287        error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
1288                     (int)remote_hps, getpagesize());
1289        return -1;
1290    }
1291
1292    remote_tps = qemu_get_be64(mis->from_src_file);
1293    if (remote_tps != (1ul << qemu_target_page_bits())) {
1294        /*
1295         * Again, some differences could be dealt with, but for now keep it
1296         * simple.
1297         */
1298        error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
1299                     (int)remote_tps, 1 << qemu_target_page_bits());
1300        return -1;
1301    }
1302
1303    if (ram_postcopy_incoming_init(mis)) {
1304        return -1;
1305    }
1306
1307    postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1308
1309    return 0;
1310}
1311
1312/* After postcopy we will be told to throw some pages away since they're
1313 * dirty and will have to be demand fetched.  Must happen before CPU is
1314 * started.
1315 * There can be 0..many of these messages, each encoding multiple pages.
1316 */
1317static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1318                                              uint16_t len)
1319{
1320    int tmp;
1321    char ramid[256];
1322    PostcopyState ps = postcopy_state_get();
1323
1324    trace_loadvm_postcopy_ram_handle_discard();
1325
1326    switch (ps) {
1327    case POSTCOPY_INCOMING_ADVISE:
1328        /* 1st discard */
1329        tmp = postcopy_ram_prepare_discard(mis);
1330        if (tmp) {
1331            return tmp;
1332        }
1333        break;
1334
1335    case POSTCOPY_INCOMING_DISCARD:
1336        /* Expected state */
1337        break;
1338
1339    default:
1340        error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1341                     ps);
1342        return -1;
1343    }
1344    /* We're expecting a
1345     *    Version (0)
1346     *    a RAM ID string (length byte, name, 0 term)
1347     *    then at least 1 16 byte chunk
1348    */
1349    if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1350        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1351        return -1;
1352    }
1353
1354    tmp = qemu_get_byte(mis->from_src_file);
1355    if (tmp != postcopy_ram_discard_version) {
1356        error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1357        return -1;
1358    }
1359
1360    if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1361        error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1362        return -1;
1363    }
1364    tmp = qemu_get_byte(mis->from_src_file);
1365    if (tmp != 0) {
1366        error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1367        return -1;
1368    }
1369
1370    len -= 3 + strlen(ramid);
1371    if (len % 16) {
1372        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1373        return -1;
1374    }
1375    trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1376    while (len) {
1377        uint64_t start_addr, block_length;
1378        start_addr = qemu_get_be64(mis->from_src_file);
1379        block_length = qemu_get_be64(mis->from_src_file);
1380
1381        len -= 16;
1382        int ret = ram_discard_range(mis, ramid, start_addr,
1383                                    block_length);
1384        if (ret) {
1385            return ret;
1386        }
1387    }
1388    trace_loadvm_postcopy_ram_handle_discard_end();
1389
1390    return 0;
1391}
1392
1393/*
1394 * Triggered by a postcopy_listen command; this thread takes over reading
1395 * the input stream, leaving the main thread free to carry on loading the rest
1396 * of the device state (from RAM).
1397 * (TODO:This could do with being in a postcopy file - but there again it's
1398 * just another input loop, not that postcopy specific)
1399 */
1400static void *postcopy_ram_listen_thread(void *opaque)
1401{
1402    QEMUFile *f = opaque;
1403    MigrationIncomingState *mis = migration_incoming_get_current();
1404    int load_res;
1405
1406    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1407                                   MIGRATION_STATUS_POSTCOPY_ACTIVE);
1408    qemu_sem_post(&mis->listen_thread_sem);
1409    trace_postcopy_ram_listen_thread_start();
1410
1411    /*
1412     * Because we're a thread and not a coroutine we can't yield
1413     * in qemu_file, and thus we must be blocking now.
1414     */
1415    qemu_file_set_blocking(f, true);
1416    load_res = qemu_loadvm_state_main(f, mis);
1417    /* And non-blocking again so we don't block in any cleanup */
1418    qemu_file_set_blocking(f, false);
1419
1420    trace_postcopy_ram_listen_thread_exit();
1421    if (load_res < 0) {
1422        error_report("%s: loadvm failed: %d", __func__, load_res);
1423        qemu_file_set_error(f, load_res);
1424        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1425                                       MIGRATION_STATUS_FAILED);
1426    } else {
1427        /*
1428         * This looks good, but it's possible that the device loading in the
1429         * main thread hasn't finished yet, and so we might not be in 'RUN'
1430         * state yet; wait for the end of the main thread.
1431         */
1432        qemu_event_wait(&mis->main_thread_load_event);
1433    }
1434    postcopy_ram_incoming_cleanup(mis);
1435
1436    if (load_res < 0) {
1437        /*
1438         * If something went wrong then we have a bad state so exit;
1439         * depending how far we got it might be possible at this point
1440         * to leave the guest running and fire MCEs for pages that never
1441         * arrived as a desperate recovery step.
1442         */
1443        exit(EXIT_FAILURE);
1444    }
1445
1446    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1447                                   MIGRATION_STATUS_COMPLETED);
1448    /*
1449     * If everything has worked fine, then the main thread has waited
1450     * for us to start, and we're the last use of the mis.
1451     * (If something broke then qemu will have to exit anyway since it's
1452     * got a bad migration state).
1453     */
1454    migration_incoming_state_destroy();
1455
1456
1457    return NULL;
1458}
1459
1460/* After this message we must be able to immediately receive postcopy data */
1461static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1462{
1463    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1464    trace_loadvm_postcopy_handle_listen();
1465    if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1466        error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1467        return -1;
1468    }
1469    if (ps == POSTCOPY_INCOMING_ADVISE) {
1470        /*
1471         * A rare case, we entered listen without having to do any discards,
1472         * so do the setup that's normally done at the time of the 1st discard.
1473         */
1474        postcopy_ram_prepare_discard(mis);
1475    }
1476
1477    /*
1478     * Sensitise RAM - can now generate requests for blocks that don't exist
1479     * However, at this point the CPU shouldn't be running, and the IO
1480     * shouldn't be doing anything yet so don't actually expect requests
1481     */
1482    if (postcopy_ram_enable_notify(mis)) {
1483        return -1;
1484    }
1485
1486    if (mis->have_listen_thread) {
1487        error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1488        return -1;
1489    }
1490
1491    mis->have_listen_thread = true;
1492    /* Start up the listening thread and wait for it to signal ready */
1493    qemu_sem_init(&mis->listen_thread_sem, 0);
1494    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1495                       postcopy_ram_listen_thread, mis->from_src_file,
1496                       QEMU_THREAD_DETACHED);
1497    qemu_sem_wait(&mis->listen_thread_sem);
1498    qemu_sem_destroy(&mis->listen_thread_sem);
1499
1500    return 0;
1501}
1502
1503
1504typedef struct {
1505    QEMUBH *bh;
1506} HandleRunBhData;
1507
1508static void loadvm_postcopy_handle_run_bh(void *opaque)
1509{
1510    Error *local_err = NULL;
1511    HandleRunBhData *data = opaque;
1512
1513    /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1514     * in migration.c
1515     */
1516    cpu_synchronize_all_post_init();
1517
1518    qemu_announce_self();
1519
1520    /* Make sure all file formats flush their mutable metadata */
1521    bdrv_invalidate_cache_all(&local_err);
1522    if (local_err) {
1523        error_report_err(local_err);
1524    }
1525
1526    trace_loadvm_postcopy_handle_run_cpu_sync();
1527    cpu_synchronize_all_post_init();
1528
1529    trace_loadvm_postcopy_handle_run_vmstart();
1530
1531    if (autostart) {
1532        /* Hold onto your hats, starting the CPU */
1533        vm_start();
1534    } else {
1535        /* leave it paused and let management decide when to start the CPU */
1536        runstate_set(RUN_STATE_PAUSED);
1537    }
1538
1539    qemu_bh_delete(data->bh);
1540    g_free(data);
1541}
1542
1543/* After all discards we can start running and asking for pages */
1544static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1545{
1546    PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1547    HandleRunBhData *data;
1548
1549    trace_loadvm_postcopy_handle_run();
1550    if (ps != POSTCOPY_INCOMING_LISTENING) {
1551        error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1552        return -1;
1553    }
1554
1555    data = g_new(HandleRunBhData, 1);
1556    data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data);
1557    qemu_bh_schedule(data->bh);
1558
1559    /* We need to finish reading the stream from the package
1560     * and also stop reading anything more from the stream that loaded the
1561     * package (since it's now being read by the listener thread).
1562     * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1563     */
1564    return LOADVM_QUIT;
1565}
1566
1567/**
1568 * Immediately following this command is a blob of data containing an embedded
1569 * chunk of migration stream; read it and load it.
1570 *
1571 * @mis: Incoming state
1572 * @length: Length of packaged data to read
1573 *
1574 * Returns: Negative values on error
1575 *
1576 */
1577static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1578{
1579    int ret;
1580    uint8_t *buffer;
1581    uint32_t length;
1582    QEMUSizedBuffer *qsb;
1583
1584    length = qemu_get_be32(mis->from_src_file);
1585    trace_loadvm_handle_cmd_packaged(length);
1586
1587    if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1588        error_report("Unreasonably large packaged state: %u", length);
1589        return -1;
1590    }
1591    buffer = g_malloc0(length);
1592    ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
1593    if (ret != length) {
1594        g_free(buffer);
1595        error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d",
1596                     ret, length);
1597        return (ret < 0) ? ret : -EAGAIN;
1598    }
1599    trace_loadvm_handle_cmd_packaged_received(ret);
1600
1601    /* Setup a dummy QEMUFile that actually reads from the buffer */
1602    qsb = qsb_create(buffer, length);
1603    g_free(buffer); /* Because qsb_create copies */
1604    if (!qsb) {
1605        error_report("Unable to create qsb");
1606    }
1607    QEMUFile *packf = qemu_bufopen("r", qsb);
1608
1609    ret = qemu_loadvm_state_main(packf, mis);
1610    trace_loadvm_handle_cmd_packaged_main(ret);
1611    qemu_fclose(packf);
1612    qsb_free(qsb);
1613
1614    return ret;
1615}
1616
1617/*
1618 * Process an incoming 'QEMU_VM_COMMAND'
1619 * 0           just a normal return
1620 * LOADVM_QUIT All good, but exit the loop
1621 * <0          Error
1622 */
1623static int loadvm_process_command(QEMUFile *f)
1624{
1625    MigrationIncomingState *mis = migration_incoming_get_current();
1626    uint16_t cmd;
1627    uint16_t len;
1628    uint32_t tmp32;
1629
1630    cmd = qemu_get_be16(f);
1631    len = qemu_get_be16(f);
1632
1633    trace_loadvm_process_command(cmd, len);
1634    if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
1635        error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
1636        return -EINVAL;
1637    }
1638
1639    if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
1640        error_report("%s received with bad length - expecting %zu, got %d",
1641                     mig_cmd_args[cmd].name,
1642                     (size_t)mig_cmd_args[cmd].len, len);
1643        return -ERANGE;
1644    }
1645
1646    switch (cmd) {
1647    case MIG_CMD_OPEN_RETURN_PATH:
1648        if (mis->to_src_file) {
1649            error_report("CMD_OPEN_RETURN_PATH called when RP already open");
1650            /* Not really a problem, so don't give up */
1651            return 0;
1652        }
1653        mis->to_src_file = qemu_file_get_return_path(f);
1654        if (!mis->to_src_file) {
1655            error_report("CMD_OPEN_RETURN_PATH failed");
1656            return -1;
1657        }
1658        break;
1659
1660    case MIG_CMD_PING:
1661        tmp32 = qemu_get_be32(f);
1662        trace_loadvm_process_command_ping(tmp32);
1663        if (!mis->to_src_file) {
1664            error_report("CMD_PING (0x%x) received with no return path",
1665                         tmp32);
1666            return -1;
1667        }
1668        migrate_send_rp_pong(mis, tmp32);
1669        break;
1670
1671    case MIG_CMD_PACKAGED:
1672        return loadvm_handle_cmd_packaged(mis);
1673
1674    case MIG_CMD_POSTCOPY_ADVISE:
1675        return loadvm_postcopy_handle_advise(mis);
1676
1677    case MIG_CMD_POSTCOPY_LISTEN:
1678        return loadvm_postcopy_handle_listen(mis);
1679
1680    case MIG_CMD_POSTCOPY_RUN:
1681        return loadvm_postcopy_handle_run(mis);
1682
1683    case MIG_CMD_POSTCOPY_RAM_DISCARD:
1684        return loadvm_postcopy_ram_handle_discard(mis, len);
1685    }
1686
1687    return 0;
1688}
1689
1690struct LoadStateEntry {
1691    QLIST_ENTRY(LoadStateEntry) entry;
1692    SaveStateEntry *se;
1693    int section_id;
1694    int version_id;
1695};
1696
1697/*
1698 * Read a footer off the wire and check that it matches the expected section
1699 *
1700 * Returns: true if the footer was good
1701 *          false if there is a problem (and calls error_report to say why)
1702 */
1703static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
1704{
1705    uint8_t read_mark;
1706    uint32_t read_section_id;
1707
1708    if (skip_section_footers) {
1709        /* No footer to check */
1710        return true;
1711    }
1712
1713    read_mark = qemu_get_byte(f);
1714
1715    if (read_mark != QEMU_VM_SECTION_FOOTER) {
1716        error_report("Missing section footer for %s", le->se->idstr);
1717        return false;
1718    }
1719
1720    read_section_id = qemu_get_be32(f);
1721    if (read_section_id != le->section_id) {
1722        error_report("Mismatched section id in footer for %s -"
1723                     " read 0x%x expected 0x%x",
1724                     le->se->idstr, read_section_id, le->section_id);
1725        return false;
1726    }
1727
1728    /* All good */
1729    return true;
1730}
1731
1732void loadvm_free_handlers(MigrationIncomingState *mis)
1733{
1734    LoadStateEntry *le, *new_le;
1735
1736    QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
1737        QLIST_REMOVE(le, entry);
1738        g_free(le);
1739    }
1740}
1741
1742static int
1743qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
1744{
1745    uint32_t instance_id, version_id, section_id;
1746    SaveStateEntry *se;
1747    LoadStateEntry *le;
1748    char idstr[256];
1749    int ret;
1750
1751    /* Read section start */
1752    section_id = qemu_get_be32(f);
1753    if (!qemu_get_counted_string(f, idstr)) {
1754        error_report("Unable to read ID string for section %u",
1755                     section_id);
1756        return -EINVAL;
1757    }
1758    instance_id = qemu_get_be32(f);
1759    version_id = qemu_get_be32(f);
1760
1761    trace_qemu_loadvm_state_section_startfull(section_id, idstr,
1762            instance_id, version_id);
1763    /* Find savevm section */
1764    se = find_se(idstr, instance_id);
1765    if (se == NULL) {
1766        error_report("Unknown savevm section or instance '%s' %d",
1767                     idstr, instance_id);
1768        return -EINVAL;
1769    }
1770
1771    /* Validate version */
1772    if (version_id > se->version_id) {
1773        error_report("savevm: unsupported version %d for '%s' v%d",
1774                     version_id, idstr, se->version_id);
1775        return -EINVAL;
1776    }
1777
1778    /* Add entry */
1779    le = g_malloc0(sizeof(*le));
1780
1781    le->se = se;
1782    le->section_id = section_id;
1783    le->version_id = version_id;
1784    QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
1785
1786    ret = vmstate_load(f, le->se, le->version_id);
1787    if (ret < 0) {
1788        error_report("error while loading state for instance 0x%x of"
1789                     " device '%s'", instance_id, idstr);
1790        return ret;
1791    }
1792    if (!check_section_footer(f, le)) {
1793        return -EINVAL;
1794    }
1795
1796    return 0;
1797}
1798
1799static int
1800qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
1801{
1802    uint32_t section_id;
1803    LoadStateEntry *le;
1804    int ret;
1805
1806    section_id = qemu_get_be32(f);
1807
1808    trace_qemu_loadvm_state_section_partend(section_id);
1809    QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
1810        if (le->section_id == section_id) {
1811            break;
1812        }
1813    }
1814    if (le == NULL) {
1815        error_report("Unknown savevm section %d", section_id);
1816        return -EINVAL;
1817    }
1818
1819    ret = vmstate_load(f, le->se, le->version_id);
1820    if (ret < 0) {
1821        error_report("error while loading state section id %d(%s)",
1822                     section_id, le->se->idstr);
1823        return ret;
1824    }
1825    if (!check_section_footer(f, le)) {
1826        return -EINVAL;
1827    }
1828
1829    return 0;
1830}
1831
1832static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
1833{
1834    uint8_t section_type;
1835    int ret;
1836
1837    while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
1838
1839        trace_qemu_loadvm_state_section(section_type);
1840        switch (section_type) {
1841        case QEMU_VM_SECTION_START:
1842        case QEMU_VM_SECTION_FULL:
1843            ret = qemu_loadvm_section_start_full(f, mis);
1844            if (ret < 0) {
1845                return ret;
1846            }
1847            break;
1848        case QEMU_VM_SECTION_PART:
1849        case QEMU_VM_SECTION_END:
1850            ret = qemu_loadvm_section_part_end(f, mis);
1851            if (ret < 0) {
1852                return ret;
1853            }
1854            break;
1855        case QEMU_VM_COMMAND:
1856            ret = loadvm_process_command(f);
1857            trace_qemu_loadvm_state_section_command(ret);
1858            if ((ret < 0) || (ret & LOADVM_QUIT)) {
1859                return ret;
1860            }
1861            break;
1862        default:
1863            error_report("Unknown savevm section type %d", section_type);
1864            return -EINVAL;
1865        }
1866    }
1867
1868    return 0;
1869}
1870
1871int qemu_loadvm_state(QEMUFile *f)
1872{
1873    MigrationIncomingState *mis = migration_incoming_get_current();
1874    Error *local_err = NULL;
1875    unsigned int v;
1876    int ret;
1877
1878    if (qemu_savevm_state_blocked(&local_err)) {
1879        error_report_err(local_err);
1880        return -EINVAL;
1881    }
1882
1883    v = qemu_get_be32(f);
1884    if (v != QEMU_VM_FILE_MAGIC) {
1885        error_report("Not a migration stream");
1886        return -EINVAL;
1887    }
1888
1889    v = qemu_get_be32(f);
1890    if (v == QEMU_VM_FILE_VERSION_COMPAT) {
1891        error_report("SaveVM v2 format is obsolete and don't work anymore");
1892        return -ENOTSUP;
1893    }
1894    if (v != QEMU_VM_FILE_VERSION) {
1895        error_report("Unsupported migration stream version");
1896        return -ENOTSUP;
1897    }
1898
1899    if (!savevm_state.skip_configuration || enforce_config_section()) {
1900        if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
1901            error_report("Configuration section missing");
1902            return -EINVAL;
1903        }
1904        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
1905
1906        if (ret) {
1907            return ret;
1908        }
1909    }
1910
1911    ret = qemu_loadvm_state_main(f, mis);
1912    qemu_event_set(&mis->main_thread_load_event);
1913
1914    trace_qemu_loadvm_state_post_main(ret);
1915
1916    if (mis->have_listen_thread) {
1917        /* Listen thread still going, can't clean up yet */
1918        return ret;
1919    }
1920
1921    if (ret == 0) {
1922        ret = qemu_file_get_error(f);
1923    }
1924
1925    /*
1926     * Try to read in the VMDESC section as well, so that dumping tools that
1927     * intercept our migration stream have the chance to see it.
1928     */
1929
1930    /* We've got to be careful; if we don't read the data and just shut the fd
1931     * then the sender can error if we close while it's still sending.
1932     * We also mustn't read data that isn't there; some transports (RDMA)
1933     * will stall waiting for that data when the source has already closed.
1934     */
1935    if (ret == 0 && should_send_vmdesc()) {
1936        uint8_t *buf;
1937        uint32_t size;
1938        uint8_t  section_type = qemu_get_byte(f);
1939
1940        if (section_type != QEMU_VM_VMDESCRIPTION) {
1941            error_report("Expected vmdescription section, but got %d",
1942                         section_type);
1943            /*
1944             * It doesn't seem worth failing at this point since
1945             * we apparently have an otherwise valid VM state
1946             */
1947        } else {
1948            buf = g_malloc(0x1000);
1949            size = qemu_get_be32(f);
1950
1951            while (size > 0) {
1952                uint32_t read_chunk = MIN(size, 0x1000);
1953                qemu_get_buffer(f, buf, read_chunk);
1954                size -= read_chunk;
1955            }
1956            g_free(buf);
1957        }
1958    }
1959
1960    cpu_synchronize_all_post_init();
1961
1962    return ret;
1963}
1964
1965void hmp_savevm(Monitor *mon, const QDict *qdict)
1966{
1967    BlockDriverState *bs, *bs1;
1968    QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
1969    int ret;
1970    QEMUFile *f;
1971    int saved_vm_running;
1972    uint64_t vm_state_size;
1973    qemu_timeval tv;
1974    struct tm tm;
1975    const char *name = qdict_get_try_str(qdict, "name");
1976    Error *local_err = NULL;
1977    AioContext *aio_context;
1978
1979    if (!bdrv_all_can_snapshot(&bs)) {
1980        monitor_printf(mon, "Device '%s' is writable but does not "
1981                       "support snapshots.\n", bdrv_get_device_name(bs));
1982        return;
1983    }
1984
1985    /* Delete old snapshots of the same name */
1986    if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) {
1987        error_reportf_err(local_err,
1988                          "Error while deleting snapshot on device '%s': ",
1989                          bdrv_get_device_name(bs1));
1990        return;
1991    }
1992
1993    bs = bdrv_all_find_vmstate_bs();
1994    if (bs == NULL) {
1995        monitor_printf(mon, "No block device can accept snapshots\n");
1996        return;
1997    }
1998    aio_context = bdrv_get_aio_context(bs);
1999
2000    saved_vm_running = runstate_is_running();
2001
2002    ret = global_state_store();
2003    if (ret) {
2004        monitor_printf(mon, "Error saving global state\n");
2005        return;
2006    }
2007    vm_stop(RUN_STATE_SAVE_VM);
2008
2009    aio_context_acquire(aio_context);
2010
2011    memset(sn, 0, sizeof(*sn));
2012
2013    /* fill auxiliary fields */
2014    qemu_gettimeofday(&tv);
2015    sn->date_sec = tv.tv_sec;
2016    sn->date_nsec = tv.tv_usec * 1000;
2017    sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
2018
2019    if (name) {
2020        ret = bdrv_snapshot_find(bs, old_sn, name);
2021        if (ret >= 0) {
2022            pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2023            pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2024        } else {
2025            pstrcpy(sn->name, sizeof(sn->name), name);
2026        }
2027    } else {
2028        /* cast below needed for OpenBSD where tv_sec is still 'long' */
2029        localtime_r((const time_t *)&tv.tv_sec, &tm);
2030        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
2031    }
2032
2033    /* save the VM state */
2034    f = qemu_fopen_bdrv(bs, 1);
2035    if (!f) {
2036        monitor_printf(mon, "Could not open VM state file\n");
2037        goto the_end;
2038    }
2039    ret = qemu_savevm_state(f, &local_err);
2040    vm_state_size = qemu_ftell(f);
2041    qemu_fclose(f);
2042    if (ret < 0) {
2043        error_report_err(local_err);
2044        goto the_end;
2045    }
2046
2047    ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2048    if (ret < 0) {
2049        monitor_printf(mon, "Error while creating snapshot on '%s'\n",
2050                       bdrv_get_device_name(bs));
2051    }
2052
2053 the_end:
2054    aio_context_release(aio_context);
2055    if (saved_vm_running) {
2056        vm_start();
2057    }
2058}
2059
2060void qmp_xen_save_devices_state(const char *filename, Error **errp)
2061{
2062    QEMUFile *f;
2063    int saved_vm_running;
2064    int ret;
2065
2066    saved_vm_running = runstate_is_running();
2067    vm_stop(RUN_STATE_SAVE_VM);
2068    global_state_store_running();
2069
2070    f = qemu_fopen(filename, "wb");
2071    if (!f) {
2072        error_setg_file_open(errp, errno, filename);
2073        goto the_end;
2074    }
2075    ret = qemu_save_device_state(f);
2076    qemu_fclose(f);
2077    if (ret < 0) {
2078        error_setg(errp, QERR_IO_ERROR);
2079    }
2080
2081 the_end:
2082    if (saved_vm_running) {
2083        vm_start();
2084    }
2085}
2086
2087int load_vmstate(const char *name)
2088{
2089    BlockDriverState *bs, *bs_vm_state;
2090    QEMUSnapshotInfo sn;
2091    QEMUFile *f;
2092    int ret;
2093    AioContext *aio_context;
2094
2095    if (!bdrv_all_can_snapshot(&bs)) {
2096        error_report("Device '%s' is writable but does not support snapshots.",
2097                     bdrv_get_device_name(bs));
2098        return -ENOTSUP;
2099    }
2100    ret = bdrv_all_find_snapshot(name, &bs);
2101    if (ret < 0) {
2102        error_report("Device '%s' does not have the requested snapshot '%s'",
2103                     bdrv_get_device_name(bs), name);
2104        return ret;
2105    }
2106
2107    bs_vm_state = bdrv_all_find_vmstate_bs();
2108    if (!bs_vm_state) {
2109        error_report("No block device supports snapshots");
2110        return -ENOTSUP;
2111    }
2112    aio_context = bdrv_get_aio_context(bs_vm_state);
2113
2114    /* Don't even try to load empty VM states */
2115    aio_context_acquire(aio_context);
2116    ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2117    aio_context_release(aio_context);
2118    if (ret < 0) {
2119        return ret;
2120    } else if (sn.vm_state_size == 0) {
2121        error_report("This is a disk-only snapshot. Revert to it offline "
2122            "using qemu-img.");
2123        return -EINVAL;
2124    }
2125
2126    /* Flush all IO requests so they don't interfere with the new state.  */
2127    bdrv_drain_all();
2128
2129    ret = bdrv_all_goto_snapshot(name, &bs);
2130    if (ret < 0) {
2131        error_report("Error %d while activating snapshot '%s' on '%s'",
2132                     ret, name, bdrv_get_device_name(bs));
2133        return ret;
2134    }
2135
2136    /* restore the VM state */
2137    f = qemu_fopen_bdrv(bs_vm_state, 0);
2138    if (!f) {
2139        error_report("Could not open VM state file");
2140        return -EINVAL;
2141    }
2142
2143    qemu_system_reset(VMRESET_SILENT);
2144    migration_incoming_state_new(f);
2145
2146    aio_context_acquire(aio_context);
2147    ret = qemu_loadvm_state(f);
2148    qemu_fclose(f);
2149    aio_context_release(aio_context);
2150
2151    migration_incoming_state_destroy();
2152    if (ret < 0) {
2153        error_report("Error %d while loading VM state", ret);
2154        return ret;
2155    }
2156
2157    return 0;
2158}
2159
2160void hmp_delvm(Monitor *mon, const QDict *qdict)
2161{
2162    BlockDriverState *bs;
2163    Error *err;
2164    const char *name = qdict_get_str(qdict, "name");
2165
2166    if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
2167        error_reportf_err(err,
2168                          "Error while deleting snapshot on device '%s': ",
2169                          bdrv_get_device_name(bs));
2170    }
2171}
2172
2173void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
2174{
2175    BlockDriverState *bs, *bs1;
2176    QEMUSnapshotInfo *sn_tab, *sn;
2177    int nb_sns, i;
2178    int total;
2179    int *available_snapshots;
2180    AioContext *aio_context;
2181
2182    bs = bdrv_all_find_vmstate_bs();
2183    if (!bs) {
2184        monitor_printf(mon, "No available block device supports snapshots\n");
2185        return;
2186    }
2187    aio_context = bdrv_get_aio_context(bs);
2188
2189    aio_context_acquire(aio_context);
2190    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2191    aio_context_release(aio_context);
2192
2193    if (nb_sns < 0) {
2194        monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
2195        return;
2196    }
2197
2198    if (nb_sns == 0) {
2199        monitor_printf(mon, "There is no snapshot available.\n");
2200        return;
2201    }
2202
2203    available_snapshots = g_new0(int, nb_sns);
2204    total = 0;
2205    for (i = 0; i < nb_sns; i++) {
2206        if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) {
2207            available_snapshots[total] = i;
2208            total++;
2209        }
2210    }
2211
2212    if (total > 0) {
2213        bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
2214        monitor_printf(mon, "\n");
2215        for (i = 0; i < total; i++) {
2216            sn = &sn_tab[available_snapshots[i]];
2217            bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
2218            monitor_printf(mon, "\n");
2219        }
2220    } else {
2221        monitor_printf(mon, "There is no suitable snapshot available\n");
2222    }
2223
2224    g_free(sn_tab);
2225    g_free(available_snapshots);
2226
2227}
2228
2229void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2230{
2231    qemu_ram_set_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK,
2232                       memory_region_name(mr), dev);
2233}
2234
2235void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2236{
2237    qemu_ram_unset_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK);
2238}
2239
2240void vmstate_register_ram_global(MemoryRegion *mr)
2241{
2242    vmstate_register_ram(mr, NULL);
2243}
2244