qemu/migration/vmstate.c
<<
>>
Prefs
   1/*
   2 * VMState interpreter
   3 *
   4 * Copyright (c) 2009-2017 Red Hat Inc
   5 *
   6 * Authors:
   7 *  Juan Quintela <quintela@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "migration.h"
  15#include "migration/vmstate.h"
  16#include "savevm.h"
  17#include "qapi/error.h"
  18#include "qobject/json-writer.h"
  19#include "qemu-file.h"
  20#include "qemu/bitops.h"
  21#include "qemu/error-report.h"
  22#include "trace.h"
  23
  24static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
  25                                   void *opaque, JSONWriter *vmdesc,
  26                                   Error **errp);
  27static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
  28                                   void *opaque);
  29
  30/* Whether this field should exist for either save or load the VM? */
  31static bool
  32vmstate_field_exists(const VMStateDescription *vmsd, const VMStateField *field,
  33                     void *opaque, int version_id)
  34{
  35    bool result;
  36
  37    if (field->field_exists) {
  38        /* If there's the function checker, that's the solo truth */
  39        result = field->field_exists(opaque, version_id);
  40        trace_vmstate_field_exists(vmsd->name, field->name, field->version_id,
  41                                   version_id, result);
  42    } else {
  43        /*
  44         * Otherwise, we only save/load if field version is same or older.
  45         * For example, when loading from an old binary with old version,
  46         * we ignore new fields with newer version_ids.
  47         */
  48        result = field->version_id <= version_id;
  49    }
  50
  51    return result;
  52}
  53
  54/*
  55 * Create a fake nullptr field when there's a NULL pointer detected in the
  56 * array of a VMS_ARRAY_OF_POINTER VMSD field.  It's needed because we
  57 * can't dereference the NULL pointer.
  58 */
  59static const VMStateField *
  60vmsd_create_fake_nullptr_field(const VMStateField *field)
  61{
  62    VMStateField *fake = g_new0(VMStateField, 1);
  63
  64    /* It can only happen on an array of pointers! */
  65    assert(field->flags & VMS_ARRAY_OF_POINTER);
  66
  67    /* Some of fake's properties should match the original's */
  68    fake->name = field->name;
  69    fake->version_id = field->version_id;
  70
  71    /* Do not need "field_exists" check as it always exists (which is null) */
  72    fake->field_exists = NULL;
  73
  74    /* See vmstate_info_nullptr - use 1 byte to represent nullptr */
  75    fake->size = 1;
  76    fake->info = &vmstate_info_nullptr;
  77    fake->flags = VMS_SINGLE;
  78
  79    /* All the rest fields shouldn't matter.. */
  80
  81    return (const VMStateField *)fake;
  82}
  83
  84static int vmstate_n_elems(void *opaque, const VMStateField *field)
  85{
  86    int n_elems = 1;
  87
  88    if (field->flags & VMS_ARRAY) {
  89        n_elems = field->num;
  90    } else if (field->flags & VMS_VARRAY_INT32) {
  91        n_elems = *(int32_t *)(opaque + field->num_offset);
  92    } else if (field->flags & VMS_VARRAY_UINT32) {
  93        n_elems = *(uint32_t *)(opaque + field->num_offset);
  94    } else if (field->flags & VMS_VARRAY_UINT16) {
  95        n_elems = *(uint16_t *)(opaque + field->num_offset);
  96    } else if (field->flags & VMS_VARRAY_UINT8) {
  97        n_elems = *(uint8_t *)(opaque + field->num_offset);
  98    }
  99
 100    if (field->flags & VMS_MULTIPLY_ELEMENTS) {
 101        n_elems *= field->num;
 102    }
 103
 104    trace_vmstate_n_elems(field->name, n_elems);
 105    return n_elems;
 106}
 107
 108static int vmstate_size(void *opaque, const VMStateField *field)
 109{
 110    int size = field->size;
 111
 112    if (field->flags & VMS_VBUFFER) {
 113        size = *(int32_t *)(opaque + field->size_offset);
 114        if (field->flags & VMS_MULTIPLY) {
 115            size *= field->size;
 116        }
 117    }
 118
 119    return size;
 120}
 121
 122static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
 123                                 void *opaque)
 124{
 125    if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
 126        gsize size = vmstate_size(opaque, field);
 127        size *= vmstate_n_elems(opaque, field);
 128        if (size) {
 129            *(void **)ptr = g_malloc(size);
 130        }
 131    }
 132}
 133
 134int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
 135                       void *opaque, int version_id)
 136{
 137    const VMStateField *field = vmsd->fields;
 138    int ret = 0;
 139
 140    trace_vmstate_load_state(vmsd->name, version_id);
 141    if (version_id > vmsd->version_id) {
 142        error_report("%s: incoming version_id %d is too new "
 143                     "for local version_id %d",
 144                     vmsd->name, version_id, vmsd->version_id);
 145        trace_vmstate_load_state_end(vmsd->name, "too new", -EINVAL);
 146        return -EINVAL;
 147    }
 148    if  (version_id < vmsd->minimum_version_id) {
 149        error_report("%s: incoming version_id %d is too old "
 150                     "for local minimum version_id  %d",
 151                     vmsd->name, version_id, vmsd->minimum_version_id);
 152        trace_vmstate_load_state_end(vmsd->name, "too old", -EINVAL);
 153        return -EINVAL;
 154    }
 155    if (vmsd->pre_load) {
 156        ret = vmsd->pre_load(opaque);
 157        if (ret) {
 158            return ret;
 159        }
 160    }
 161    while (field->name) {
 162        bool exists = vmstate_field_exists(vmsd, field, opaque, version_id);
 163        trace_vmstate_load_state_field(vmsd->name, field->name, exists);
 164        if (exists) {
 165            void *first_elem = opaque + field->offset;
 166            int i, n_elems = vmstate_n_elems(opaque, field);
 167            int size = vmstate_size(opaque, field);
 168
 169            vmstate_handle_alloc(first_elem, field, opaque);
 170            if (field->flags & VMS_POINTER) {
 171                first_elem = *(void **)first_elem;
 172                assert(first_elem || !n_elems || !size);
 173            }
 174            for (i = 0; i < n_elems; i++) {
 175                void *curr_elem = first_elem + size * i;
 176                const VMStateField *inner_field;
 177
 178                if (field->flags & VMS_ARRAY_OF_POINTER) {
 179                    curr_elem = *(void **)curr_elem;
 180                }
 181
 182                if (!curr_elem && size) {
 183                    /*
 184                     * If null pointer found (which should only happen in
 185                     * an array of pointers), use null placeholder and do
 186                     * not follow.
 187                     */
 188                    inner_field = vmsd_create_fake_nullptr_field(field);
 189                } else {
 190                    inner_field = field;
 191                }
 192
 193                if (inner_field->flags & VMS_STRUCT) {
 194                    ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
 195                                             inner_field->vmsd->version_id);
 196                } else if (inner_field->flags & VMS_VSTRUCT) {
 197                    ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
 198                                             inner_field->struct_version_id);
 199                } else {
 200                    ret = inner_field->info->get(f, curr_elem, size,
 201                                                 inner_field);
 202                }
 203
 204                /* If we used a fake temp field.. free it now */
 205                if (inner_field != field) {
 206                    g_clear_pointer((gpointer *)&inner_field, g_free);
 207                }
 208
 209                if (ret >= 0) {
 210                    ret = qemu_file_get_error(f);
 211                }
 212                if (ret < 0) {
 213                    qemu_file_set_error(f, ret);
 214                    error_report("Failed to load %s:%s", vmsd->name,
 215                                 field->name);
 216                    trace_vmstate_load_field_error(field->name, ret);
 217                    return ret;
 218                }
 219            }
 220        } else if (field->flags & VMS_MUST_EXIST) {
 221            error_report("Input validation failed: %s/%s",
 222                         vmsd->name, field->name);
 223            return -1;
 224        }
 225        field++;
 226    }
 227    assert(field->flags == VMS_END);
 228    ret = vmstate_subsection_load(f, vmsd, opaque);
 229    if (ret != 0) {
 230        qemu_file_set_error(f, ret);
 231        return ret;
 232    }
 233    if (vmsd->post_load) {
 234        ret = vmsd->post_load(opaque, version_id);
 235    }
 236    trace_vmstate_load_state_end(vmsd->name, "end", ret);
 237    return ret;
 238}
 239
 240static int vmfield_name_num(const VMStateField *start,
 241                            const VMStateField *search)
 242{
 243    const VMStateField *field;
 244    int found = 0;
 245
 246    for (field = start; field->name; field++) {
 247        if (!strcmp(field->name, search->name)) {
 248            if (field == search) {
 249                return found;
 250            }
 251            found++;
 252        }
 253    }
 254
 255    return -1;
 256}
 257
 258static bool vmfield_name_is_unique(const VMStateField *start,
 259                                   const VMStateField *search)
 260{
 261    const VMStateField *field;
 262    int found = 0;
 263
 264    for (field = start; field->name; field++) {
 265        if (!strcmp(field->name, search->name)) {
 266            found++;
 267            /* name found more than once, so it's not unique */
 268            if (found > 1) {
 269                return false;
 270            }
 271        }
 272    }
 273
 274    return true;
 275}
 276
 277static const char *vmfield_get_type_name(const VMStateField *field)
 278{
 279    const char *type = "unknown";
 280
 281    if (field->flags & VMS_STRUCT) {
 282        type = "struct";
 283    } else if (field->flags & VMS_VSTRUCT) {
 284        type = "vstruct";
 285    } else if (field->info->name) {
 286        type = field->info->name;
 287    }
 288
 289    return type;
 290}
 291
 292static bool vmsd_can_compress(const VMStateField *field)
 293{
 294    if (field->field_exists) {
 295        /* Dynamically existing fields mess up compression */
 296        return false;
 297    }
 298
 299    if (field->flags & VMS_STRUCT) {
 300        const VMStateField *sfield = field->vmsd->fields;
 301        while (sfield->name) {
 302            if (!vmsd_can_compress(sfield)) {
 303                /* Child elements can't compress, so can't we */
 304                return false;
 305            }
 306            sfield++;
 307        }
 308
 309        if (field->vmsd->subsections) {
 310            /* Subsections may come and go, better don't compress */
 311            return false;
 312        }
 313    }
 314
 315    return true;
 316}
 317
 318static void vmsd_desc_field_start(const VMStateDescription *vmsd,
 319                                  JSONWriter *vmdesc,
 320                                  const VMStateField *field, int i, int max)
 321{
 322    char *name, *old_name;
 323    bool is_array = max > 1;
 324    bool can_compress = vmsd_can_compress(field);
 325
 326    if (!vmdesc) {
 327        return;
 328    }
 329
 330    name = g_strdup(field->name);
 331
 332    /* Field name is not unique, need to make it unique */
 333    if (!vmfield_name_is_unique(vmsd->fields, field)) {
 334        int num = vmfield_name_num(vmsd->fields, field);
 335        old_name = name;
 336        name = g_strdup_printf("%s[%d]", name, num);
 337        g_free(old_name);
 338    }
 339
 340    json_writer_start_object(vmdesc, NULL);
 341    json_writer_str(vmdesc, "name", name);
 342    if (is_array) {
 343        if (can_compress) {
 344            json_writer_int64(vmdesc, "array_len", max);
 345        } else {
 346            json_writer_int64(vmdesc, "index", i);
 347        }
 348    }
 349    json_writer_str(vmdesc, "type", vmfield_get_type_name(field));
 350
 351    if (field->flags & VMS_STRUCT) {
 352        json_writer_start_object(vmdesc, "struct");
 353    }
 354
 355    g_free(name);
 356}
 357
 358static void vmsd_desc_field_end(const VMStateDescription *vmsd,
 359                                JSONWriter *vmdesc,
 360                                const VMStateField *field, size_t size)
 361{
 362    if (!vmdesc) {
 363        return;
 364    }
 365
 366    if (field->flags & VMS_STRUCT) {
 367        /* We printed a struct in between, close its child object */
 368        json_writer_end_object(vmdesc);
 369    }
 370
 371    json_writer_int64(vmdesc, "size", size);
 372    json_writer_end_object(vmdesc);
 373}
 374
 375
 376bool vmstate_section_needed(const VMStateDescription *vmsd, void *opaque)
 377{
 378    if (vmsd->needed && !vmsd->needed(opaque)) {
 379        /* optional section not needed */
 380        return false;
 381    }
 382    return true;
 383}
 384
 385
 386int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
 387                       void *opaque, JSONWriter *vmdesc_id)
 388{
 389    return vmstate_save_state_v(f, vmsd, opaque, vmdesc_id, vmsd->version_id, NULL);
 390}
 391
 392int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
 393                       void *opaque, JSONWriter *vmdesc_id, Error **errp)
 394{
 395    return vmstate_save_state_v(f, vmsd, opaque, vmdesc_id, vmsd->version_id, errp);
 396}
 397
 398int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
 399                         void *opaque, JSONWriter *vmdesc, int version_id, Error **errp)
 400{
 401    int ret = 0;
 402    const VMStateField *field = vmsd->fields;
 403
 404    trace_vmstate_save_state_top(vmsd->name);
 405
 406    if (vmsd->pre_save) {
 407        ret = vmsd->pre_save(opaque);
 408        trace_vmstate_save_state_pre_save_res(vmsd->name, ret);
 409        if (ret) {
 410            error_setg(errp, "pre-save failed: %s", vmsd->name);
 411            return ret;
 412        }
 413    }
 414
 415    if (vmdesc) {
 416        json_writer_str(vmdesc, "vmsd_name", vmsd->name);
 417        json_writer_int64(vmdesc, "version", version_id);
 418        json_writer_start_array(vmdesc, "fields");
 419    }
 420
 421    while (field->name) {
 422        if (vmstate_field_exists(vmsd, field, opaque, version_id)) {
 423            void *first_elem = opaque + field->offset;
 424            int i, n_elems = vmstate_n_elems(opaque, field);
 425            int size = vmstate_size(opaque, field);
 426            uint64_t old_offset, written_bytes;
 427            JSONWriter *vmdesc_loop = vmdesc;
 428            bool is_prev_null = false;
 429
 430            trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
 431            if (field->flags & VMS_POINTER) {
 432                first_elem = *(void **)first_elem;
 433                assert(first_elem || !n_elems || !size);
 434            }
 435
 436            for (i = 0; i < n_elems; i++) {
 437                void *curr_elem = first_elem + size * i;
 438                const VMStateField *inner_field;
 439                bool is_null;
 440                int max_elems = n_elems - i;
 441
 442                old_offset = qemu_file_transferred(f);
 443                if (field->flags & VMS_ARRAY_OF_POINTER) {
 444                    assert(curr_elem);
 445                    curr_elem = *(void **)curr_elem;
 446                }
 447
 448                if (!curr_elem && size) {
 449                    /*
 450                     * If null pointer found (which should only happen in
 451                     * an array of pointers), use null placeholder and do
 452                     * not follow.
 453                     */
 454                    inner_field = vmsd_create_fake_nullptr_field(field);
 455                    is_null = true;
 456                } else {
 457                    inner_field = field;
 458                    is_null = false;
 459                }
 460
 461                /*
 462                 * This logic only matters when dumping VM Desc.
 463                 *
 464                 * Due to the fake nullptr handling above, if there's mixed
 465                 * null/non-null data, it doesn't make sense to emit a
 466                 * compressed array representation spanning the entire array
 467                 * because the field types will be different (e.g. struct
 468                 * vs. nullptr). Search ahead for the next null/non-null element
 469                 * and start a new compressed array if found.
 470                 */
 471                if (vmdesc && (field->flags & VMS_ARRAY_OF_POINTER) &&
 472                    is_null != is_prev_null) {
 473
 474                    is_prev_null = is_null;
 475                    vmdesc_loop = vmdesc;
 476
 477                    for (int j = i + 1; j < n_elems; j++) {
 478                        void *elem = *(void **)(first_elem + size * j);
 479                        bool elem_is_null = !elem && size;
 480
 481                        if (is_null != elem_is_null) {
 482                            max_elems = j - i;
 483                            break;
 484                        }
 485                    }
 486                }
 487
 488                vmsd_desc_field_start(vmsd, vmdesc_loop, inner_field,
 489                                      i, max_elems);
 490
 491                if (inner_field->flags & VMS_STRUCT) {
 492                    ret = vmstate_save_state(f, inner_field->vmsd,
 493                                             curr_elem, vmdesc_loop);
 494                } else if (inner_field->flags & VMS_VSTRUCT) {
 495                    ret = vmstate_save_state_v(f, inner_field->vmsd,
 496                                               curr_elem, vmdesc_loop,
 497                                               inner_field->struct_version_id,
 498                                               errp);
 499                } else {
 500                    ret = inner_field->info->put(f, curr_elem, size,
 501                                                 inner_field, vmdesc_loop);
 502                }
 503
 504                written_bytes = qemu_file_transferred(f) - old_offset;
 505                vmsd_desc_field_end(vmsd, vmdesc_loop, inner_field,
 506                                    written_bytes);
 507
 508                /* If we used a fake temp field.. free it now */
 509                if (is_null) {
 510                    g_clear_pointer((gpointer *)&inner_field, g_free);
 511                }
 512
 513                if (ret) {
 514                    error_setg(errp, "Save of field %s/%s failed",
 515                                vmsd->name, field->name);
 516                    if (vmsd->post_save) {
 517                        vmsd->post_save(opaque);
 518                    }
 519                    return ret;
 520                }
 521
 522                /* Compressed arrays only care about the first element */
 523                if (vmdesc_loop && vmsd_can_compress(field)) {
 524                    vmdesc_loop = NULL;
 525                }
 526            }
 527        } else {
 528            if (field->flags & VMS_MUST_EXIST) {
 529                error_report("Output state validation failed: %s/%s",
 530                        vmsd->name, field->name);
 531                assert(!(field->flags & VMS_MUST_EXIST));
 532            }
 533        }
 534        field++;
 535    }
 536    assert(field->flags == VMS_END);
 537
 538    if (vmdesc) {
 539        json_writer_end_array(vmdesc);
 540    }
 541
 542    ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc, errp);
 543
 544    if (vmsd->post_save) {
 545        int ps_ret = vmsd->post_save(opaque);
 546        if (!ret && ps_ret) {
 547            ret = ps_ret;
 548            error_setg(errp, "post-save failed: %s", vmsd->name);
 549        }
 550    }
 551    return ret;
 552}
 553
 554static const VMStateDescription *
 555vmstate_get_subsection(const VMStateDescription * const *sub,
 556                       const char *idstr)
 557{
 558    if (sub) {
 559        for (const VMStateDescription *s = *sub; s ; s = *++sub) {
 560            if (strcmp(idstr, s->name) == 0) {
 561                return s;
 562            }
 563        }
 564    }
 565    return NULL;
 566}
 567
 568static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
 569                                   void *opaque)
 570{
 571    trace_vmstate_subsection_load(vmsd->name);
 572
 573    while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
 574        char idstr[256], *idstr_ret;
 575        int ret;
 576        uint8_t version_id, len, size;
 577        const VMStateDescription *sub_vmsd;
 578
 579        len = qemu_peek_byte(f, 1);
 580        if (len < strlen(vmsd->name) + 1) {
 581            /* subsection name has to be "section_name/a" */
 582            trace_vmstate_subsection_load_bad(vmsd->name, "(short)", "");
 583            return 0;
 584        }
 585        size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2);
 586        if (size != len) {
 587            trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)", "");
 588            return 0;
 589        }
 590        memcpy(idstr, idstr_ret, size);
 591        idstr[size] = 0;
 592
 593        if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
 594            trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(prefix)");
 595            /* it doesn't have a valid subsection name */
 596            return 0;
 597        }
 598        sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr);
 599        if (sub_vmsd == NULL) {
 600            trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(lookup)");
 601            return -ENOENT;
 602        }
 603        qemu_file_skip(f, 1); /* subsection */
 604        qemu_file_skip(f, 1); /* len */
 605        qemu_file_skip(f, len); /* idstr */
 606        version_id = qemu_get_be32(f);
 607
 608        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
 609        if (ret) {
 610            trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(child)");
 611            return ret;
 612        }
 613    }
 614
 615    trace_vmstate_subsection_load_good(vmsd->name);
 616    return 0;
 617}
 618
 619static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
 620                                   void *opaque, JSONWriter *vmdesc,
 621                                   Error **errp)
 622{
 623    const VMStateDescription * const *sub = vmsd->subsections;
 624    bool vmdesc_has_subsections = false;
 625    int ret = 0;
 626
 627    trace_vmstate_subsection_save_top(vmsd->name);
 628    while (sub && *sub) {
 629        if (vmstate_section_needed(*sub, opaque)) {
 630            const VMStateDescription *vmsdsub = *sub;
 631            uint8_t len;
 632
 633            trace_vmstate_subsection_save_loop(vmsd->name, vmsdsub->name);
 634            if (vmdesc) {
 635                /* Only create subsection array when we have any */
 636                if (!vmdesc_has_subsections) {
 637                    json_writer_start_array(vmdesc, "subsections");
 638                    vmdesc_has_subsections = true;
 639                }
 640
 641                json_writer_start_object(vmdesc, NULL);
 642            }
 643
 644            qemu_put_byte(f, QEMU_VM_SUBSECTION);
 645            len = strlen(vmsdsub->name);
 646            qemu_put_byte(f, len);
 647            qemu_put_buffer(f, (uint8_t *)vmsdsub->name, len);
 648            qemu_put_be32(f, vmsdsub->version_id);
 649            ret = vmstate_save_state_with_err(f, vmsdsub, opaque, vmdesc, errp);
 650            if (ret) {
 651                return ret;
 652            }
 653
 654            if (vmdesc) {
 655                json_writer_end_object(vmdesc);
 656            }
 657        }
 658        sub++;
 659    }
 660
 661    if (vmdesc_has_subsections) {
 662        json_writer_end_array(vmdesc);
 663    }
 664
 665    return ret;
 666}
 667