qemu/hw/vfio/migration.c
<<
>>
Prefs
   1/*
   2 * Migration support for VFIO devices
   3 *
   4 * Copyright NVIDIA, Inc. 2020
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2. See
   7 * the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/main-loop.h"
  12#include "qemu/cutils.h"
  13#include <linux/vfio.h>
  14#include <sys/ioctl.h>
  15
  16#include "sysemu/runstate.h"
  17#include "hw/vfio/vfio-common.h"
  18#include "migration/migration.h"
  19#include "migration/vmstate.h"
  20#include "migration/qemu-file.h"
  21#include "migration/register.h"
  22#include "migration/blocker.h"
  23#include "migration/misc.h"
  24#include "qapi/error.h"
  25#include "exec/ramlist.h"
  26#include "exec/ram_addr.h"
  27#include "pci.h"
  28#include "trace.h"
  29#include "hw/hw.h"
  30
  31/*
  32 * Flags to be used as unique delimiters for VFIO devices in the migration
  33 * stream. These flags are composed as:
  34 * 0xffffffff => MSB 32-bit all 1s
  35 * 0xef10     => Magic ID, represents emulated (virtual) function IO
  36 * 0x0000     => 16-bits reserved for flags
  37 *
  38 * The beginning of state information is marked by _DEV_CONFIG_STATE,
  39 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
  40 * certain state information is marked by _END_OF_STATE.
  41 */
  42#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
  43#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
  44#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
  45#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
  46
  47static int64_t bytes_transferred;
  48
  49static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
  50                                  off_t off, bool iswrite)
  51{
  52    int ret;
  53
  54    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
  55                    pread(vbasedev->fd, val, count, off);
  56    if (ret < count) {
  57        error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
  58                     HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
  59                     vbasedev->name, off, strerror(errno));
  60        return (ret < 0) ? ret : -EINVAL;
  61    }
  62    return 0;
  63}
  64
  65static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
  66                       off_t off, bool iswrite)
  67{
  68    int ret, done = 0;
  69    __u8 *tbuf = buf;
  70
  71    while (count) {
  72        int bytes = 0;
  73
  74        if (count >= 8 && !(off % 8)) {
  75            bytes = 8;
  76        } else if (count >= 4 && !(off % 4)) {
  77            bytes = 4;
  78        } else if (count >= 2 && !(off % 2)) {
  79            bytes = 2;
  80        } else {
  81            bytes = 1;
  82        }
  83
  84        ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
  85        if (ret) {
  86            return ret;
  87        }
  88
  89        count -= bytes;
  90        done += bytes;
  91        off += bytes;
  92        tbuf += bytes;
  93    }
  94    return done;
  95}
  96
  97#define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
  98#define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
  99
 100#define VFIO_MIG_STRUCT_OFFSET(f)       \
 101                                 offsetof(struct vfio_device_migration_info, f)
 102/*
 103 * Change the device_state register for device @vbasedev. Bits set in @mask
 104 * are preserved, bits set in @value are set, and bits not set in either @mask
 105 * or @value are cleared in device_state. If the register cannot be accessed,
 106 * the resulting state would be invalid, or the device enters an error state,
 107 * an error is returned.
 108 */
 109
 110static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
 111                                    uint32_t value)
 112{
 113    VFIOMigration *migration = vbasedev->migration;
 114    VFIORegion *region = &migration->region;
 115    off_t dev_state_off = region->fd_offset +
 116                          VFIO_MIG_STRUCT_OFFSET(device_state);
 117    uint32_t device_state;
 118    int ret;
 119
 120    ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
 121                        dev_state_off);
 122    if (ret < 0) {
 123        return ret;
 124    }
 125
 126    device_state = (device_state & mask) | value;
 127
 128    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
 129        return -EINVAL;
 130    }
 131
 132    ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
 133                         dev_state_off);
 134    if (ret < 0) {
 135        int rret;
 136
 137        rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
 138                             dev_state_off);
 139
 140        if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
 141            hw_error("%s: Device in error state 0x%x", vbasedev->name,
 142                     device_state);
 143            return rret ? rret : -EIO;
 144        }
 145        return ret;
 146    }
 147
 148    migration->device_state = device_state;
 149    trace_vfio_migration_set_state(vbasedev->name, device_state);
 150    return 0;
 151}
 152
 153static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
 154                                   uint64_t data_size, uint64_t *size)
 155{
 156    void *ptr = NULL;
 157    uint64_t limit = 0;
 158    int i;
 159
 160    if (!region->mmaps) {
 161        if (size) {
 162            *size = MIN(data_size, region->size - data_offset);
 163        }
 164        return ptr;
 165    }
 166
 167    for (i = 0; i < region->nr_mmaps; i++) {
 168        VFIOMmap *map = region->mmaps + i;
 169
 170        if ((data_offset >= map->offset) &&
 171            (data_offset < map->offset + map->size)) {
 172
 173            /* check if data_offset is within sparse mmap areas */
 174            ptr = map->mmap + data_offset - map->offset;
 175            if (size) {
 176                *size = MIN(data_size, map->offset + map->size - data_offset);
 177            }
 178            break;
 179        } else if ((data_offset < map->offset) &&
 180                   (!limit || limit > map->offset)) {
 181            /*
 182             * data_offset is not within sparse mmap areas, find size of
 183             * non-mapped area. Check through all list since region->mmaps list
 184             * is not sorted.
 185             */
 186            limit = map->offset;
 187        }
 188    }
 189
 190    if (!ptr && size) {
 191        *size = limit ? MIN(data_size, limit - data_offset) : data_size;
 192    }
 193    return ptr;
 194}
 195
 196static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
 197{
 198    VFIOMigration *migration = vbasedev->migration;
 199    VFIORegion *region = &migration->region;
 200    uint64_t data_offset = 0, data_size = 0, sz;
 201    int ret;
 202
 203    ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
 204                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
 205    if (ret < 0) {
 206        return ret;
 207    }
 208
 209    ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
 210                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
 211    if (ret < 0) {
 212        return ret;
 213    }
 214
 215    trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
 216                           migration->pending_bytes);
 217
 218    qemu_put_be64(f, data_size);
 219    sz = data_size;
 220
 221    while (sz) {
 222        void *buf;
 223        uint64_t sec_size;
 224        bool buf_allocated = false;
 225
 226        buf = get_data_section_size(region, data_offset, sz, &sec_size);
 227
 228        if (!buf) {
 229            buf = g_try_malloc(sec_size);
 230            if (!buf) {
 231                error_report("%s: Error allocating buffer ", __func__);
 232                return -ENOMEM;
 233            }
 234            buf_allocated = true;
 235
 236            ret = vfio_mig_read(vbasedev, buf, sec_size,
 237                                region->fd_offset + data_offset);
 238            if (ret < 0) {
 239                g_free(buf);
 240                return ret;
 241            }
 242        }
 243
 244        qemu_put_buffer(f, buf, sec_size);
 245
 246        if (buf_allocated) {
 247            g_free(buf);
 248        }
 249        sz -= sec_size;
 250        data_offset += sec_size;
 251    }
 252
 253    ret = qemu_file_get_error(f);
 254
 255    if (!ret && size) {
 256        *size = data_size;
 257    }
 258
 259    bytes_transferred += data_size;
 260    return ret;
 261}
 262
 263static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
 264                            uint64_t data_size)
 265{
 266    VFIORegion *region = &vbasedev->migration->region;
 267    uint64_t data_offset = 0, size, report_size;
 268    int ret;
 269
 270    do {
 271        ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
 272                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
 273        if (ret < 0) {
 274            return ret;
 275        }
 276
 277        if (data_offset + data_size > region->size) {
 278            /*
 279             * If data_size is greater than the data section of migration region
 280             * then iterate the write buffer operation. This case can occur if
 281             * size of migration region at destination is smaller than size of
 282             * migration region at source.
 283             */
 284            report_size = size = region->size - data_offset;
 285            data_size -= size;
 286        } else {
 287            report_size = size = data_size;
 288            data_size = 0;
 289        }
 290
 291        trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
 292
 293        while (size) {
 294            void *buf;
 295            uint64_t sec_size;
 296            bool buf_alloc = false;
 297
 298            buf = get_data_section_size(region, data_offset, size, &sec_size);
 299
 300            if (!buf) {
 301                buf = g_try_malloc(sec_size);
 302                if (!buf) {
 303                    error_report("%s: Error allocating buffer ", __func__);
 304                    return -ENOMEM;
 305                }
 306                buf_alloc = true;
 307            }
 308
 309            qemu_get_buffer(f, buf, sec_size);
 310
 311            if (buf_alloc) {
 312                ret = vfio_mig_write(vbasedev, buf, sec_size,
 313                        region->fd_offset + data_offset);
 314                g_free(buf);
 315
 316                if (ret < 0) {
 317                    return ret;
 318                }
 319            }
 320            size -= sec_size;
 321            data_offset += sec_size;
 322        }
 323
 324        ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
 325                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
 326        if (ret < 0) {
 327            return ret;
 328        }
 329    } while (data_size);
 330
 331    return 0;
 332}
 333
 334static int vfio_update_pending(VFIODevice *vbasedev)
 335{
 336    VFIOMigration *migration = vbasedev->migration;
 337    VFIORegion *region = &migration->region;
 338    uint64_t pending_bytes = 0;
 339    int ret;
 340
 341    ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
 342                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
 343    if (ret < 0) {
 344        migration->pending_bytes = 0;
 345        return ret;
 346    }
 347
 348    migration->pending_bytes = pending_bytes;
 349    trace_vfio_update_pending(vbasedev->name, pending_bytes);
 350    return 0;
 351}
 352
 353static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
 354{
 355    VFIODevice *vbasedev = opaque;
 356
 357    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
 358
 359    if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
 360        vbasedev->ops->vfio_save_config(vbasedev, f);
 361    }
 362
 363    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 364
 365    trace_vfio_save_device_config_state(vbasedev->name);
 366
 367    return qemu_file_get_error(f);
 368}
 369
 370static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
 371{
 372    VFIODevice *vbasedev = opaque;
 373    uint64_t data;
 374
 375    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
 376        int ret;
 377
 378        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
 379        if (ret) {
 380            error_report("%s: Failed to load device config space",
 381                         vbasedev->name);
 382            return ret;
 383        }
 384    }
 385
 386    data = qemu_get_be64(f);
 387    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
 388        error_report("%s: Failed loading device config space, "
 389                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
 390        return -EINVAL;
 391    }
 392
 393    trace_vfio_load_device_config_state(vbasedev->name);
 394    return qemu_file_get_error(f);
 395}
 396
 397static void vfio_migration_cleanup(VFIODevice *vbasedev)
 398{
 399    VFIOMigration *migration = vbasedev->migration;
 400
 401    if (migration->region.mmaps) {
 402        vfio_region_unmap(&migration->region);
 403    }
 404}
 405
 406/* ---------------------------------------------------------------------- */
 407
 408static int vfio_save_setup(QEMUFile *f, void *opaque)
 409{
 410    VFIODevice *vbasedev = opaque;
 411    VFIOMigration *migration = vbasedev->migration;
 412    int ret;
 413
 414    trace_vfio_save_setup(vbasedev->name);
 415
 416    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 417
 418    if (migration->region.mmaps) {
 419        /*
 420         * Calling vfio_region_mmap() from migration thread. Memory API called
 421         * from this function require locking the iothread when called from
 422         * outside the main loop thread.
 423         */
 424        qemu_mutex_lock_iothread();
 425        ret = vfio_region_mmap(&migration->region);
 426        qemu_mutex_unlock_iothread();
 427        if (ret) {
 428            error_report("%s: Failed to mmap VFIO migration region: %s",
 429                         vbasedev->name, strerror(-ret));
 430            error_report("%s: Falling back to slow path", vbasedev->name);
 431        }
 432    }
 433
 434    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
 435                                   VFIO_DEVICE_STATE_SAVING);
 436    if (ret) {
 437        error_report("%s: Failed to set state SAVING", vbasedev->name);
 438        return ret;
 439    }
 440
 441    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 442
 443    ret = qemu_file_get_error(f);
 444    if (ret) {
 445        return ret;
 446    }
 447
 448    return 0;
 449}
 450
 451static void vfio_save_cleanup(void *opaque)
 452{
 453    VFIODevice *vbasedev = opaque;
 454
 455    vfio_migration_cleanup(vbasedev);
 456    trace_vfio_save_cleanup(vbasedev->name);
 457}
 458
 459static void vfio_save_pending(QEMUFile *f, void *opaque,
 460                              uint64_t threshold_size,
 461                              uint64_t *res_precopy_only,
 462                              uint64_t *res_compatible,
 463                              uint64_t *res_postcopy_only)
 464{
 465    VFIODevice *vbasedev = opaque;
 466    VFIOMigration *migration = vbasedev->migration;
 467    int ret;
 468
 469    ret = vfio_update_pending(vbasedev);
 470    if (ret) {
 471        return;
 472    }
 473
 474    *res_precopy_only += migration->pending_bytes;
 475
 476    trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
 477                            *res_postcopy_only, *res_compatible);
 478}
 479
 480static int vfio_save_iterate(QEMUFile *f, void *opaque)
 481{
 482    VFIODevice *vbasedev = opaque;
 483    VFIOMigration *migration = vbasedev->migration;
 484    uint64_t data_size;
 485    int ret;
 486
 487    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 488
 489    if (migration->pending_bytes == 0) {
 490        ret = vfio_update_pending(vbasedev);
 491        if (ret) {
 492            return ret;
 493        }
 494
 495        if (migration->pending_bytes == 0) {
 496            qemu_put_be64(f, 0);
 497            qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 498            /* indicates data finished, goto complete phase */
 499            return 1;
 500        }
 501    }
 502
 503    ret = vfio_save_buffer(f, vbasedev, &data_size);
 504    if (ret) {
 505        error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
 506                     strerror(errno));
 507        return ret;
 508    }
 509
 510    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 511
 512    ret = qemu_file_get_error(f);
 513    if (ret) {
 514        return ret;
 515    }
 516
 517    /*
 518     * Reset pending_bytes as .save_live_pending is not called during savevm or
 519     * snapshot case, in such case vfio_update_pending() at the start of this
 520     * function updates pending_bytes.
 521     */
 522    migration->pending_bytes = 0;
 523    trace_vfio_save_iterate(vbasedev->name, data_size);
 524    return 0;
 525}
 526
 527static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
 528{
 529    VFIODevice *vbasedev = opaque;
 530    VFIOMigration *migration = vbasedev->migration;
 531    uint64_t data_size;
 532    int ret;
 533
 534    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
 535                                   VFIO_DEVICE_STATE_SAVING);
 536    if (ret) {
 537        error_report("%s: Failed to set state STOP and SAVING",
 538                     vbasedev->name);
 539        return ret;
 540    }
 541
 542    ret = vfio_update_pending(vbasedev);
 543    if (ret) {
 544        return ret;
 545    }
 546
 547    while (migration->pending_bytes > 0) {
 548        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 549        ret = vfio_save_buffer(f, vbasedev, &data_size);
 550        if (ret < 0) {
 551            error_report("%s: Failed to save buffer", vbasedev->name);
 552            return ret;
 553        }
 554
 555        if (data_size == 0) {
 556            break;
 557        }
 558
 559        ret = vfio_update_pending(vbasedev);
 560        if (ret) {
 561            return ret;
 562        }
 563    }
 564
 565    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 566
 567    ret = qemu_file_get_error(f);
 568    if (ret) {
 569        return ret;
 570    }
 571
 572    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
 573    if (ret) {
 574        error_report("%s: Failed to set state STOPPED", vbasedev->name);
 575        return ret;
 576    }
 577
 578    trace_vfio_save_complete_precopy(vbasedev->name);
 579    return ret;
 580}
 581
 582static void vfio_save_state(QEMUFile *f, void *opaque)
 583{
 584    VFIODevice *vbasedev = opaque;
 585    int ret;
 586
 587    ret = vfio_save_device_config_state(f, opaque);
 588    if (ret) {
 589        error_report("%s: Failed to save device config space",
 590                     vbasedev->name);
 591        qemu_file_set_error(f, ret);
 592    }
 593}
 594
 595static int vfio_load_setup(QEMUFile *f, void *opaque)
 596{
 597    VFIODevice *vbasedev = opaque;
 598    VFIOMigration *migration = vbasedev->migration;
 599    int ret = 0;
 600
 601    if (migration->region.mmaps) {
 602        ret = vfio_region_mmap(&migration->region);
 603        if (ret) {
 604            error_report("%s: Failed to mmap VFIO migration region %d: %s",
 605                         vbasedev->name, migration->region.nr,
 606                         strerror(-ret));
 607            error_report("%s: Falling back to slow path", vbasedev->name);
 608        }
 609    }
 610
 611    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
 612                                   VFIO_DEVICE_STATE_RESUMING);
 613    if (ret) {
 614        error_report("%s: Failed to set state RESUMING", vbasedev->name);
 615        if (migration->region.mmaps) {
 616            vfio_region_unmap(&migration->region);
 617        }
 618    }
 619    return ret;
 620}
 621
 622static int vfio_load_cleanup(void *opaque)
 623{
 624    VFIODevice *vbasedev = opaque;
 625
 626    vfio_migration_cleanup(vbasedev);
 627    trace_vfio_load_cleanup(vbasedev->name);
 628    return 0;
 629}
 630
 631static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
 632{
 633    VFIODevice *vbasedev = opaque;
 634    int ret = 0;
 635    uint64_t data;
 636
 637    data = qemu_get_be64(f);
 638    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
 639
 640        trace_vfio_load_state(vbasedev->name, data);
 641
 642        switch (data) {
 643        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
 644        {
 645            return vfio_load_device_config_state(f, opaque);
 646        }
 647        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
 648        {
 649            data = qemu_get_be64(f);
 650            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
 651                return ret;
 652            } else {
 653                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
 654                             vbasedev->name, data);
 655                return -EINVAL;
 656            }
 657            break;
 658        }
 659        case VFIO_MIG_FLAG_DEV_DATA_STATE:
 660        {
 661            uint64_t data_size = qemu_get_be64(f);
 662
 663            if (data_size) {
 664                ret = vfio_load_buffer(f, vbasedev, data_size);
 665                if (ret < 0) {
 666                    return ret;
 667                }
 668            }
 669            break;
 670        }
 671        default:
 672            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
 673            return -EINVAL;
 674        }
 675
 676        data = qemu_get_be64(f);
 677        ret = qemu_file_get_error(f);
 678        if (ret) {
 679            return ret;
 680        }
 681    }
 682    return ret;
 683}
 684
 685static SaveVMHandlers savevm_vfio_handlers = {
 686    .save_setup = vfio_save_setup,
 687    .save_cleanup = vfio_save_cleanup,
 688    .save_live_pending = vfio_save_pending,
 689    .save_live_iterate = vfio_save_iterate,
 690    .save_live_complete_precopy = vfio_save_complete_precopy,
 691    .save_state = vfio_save_state,
 692    .load_setup = vfio_load_setup,
 693    .load_cleanup = vfio_load_cleanup,
 694    .load_state = vfio_load_state,
 695};
 696
 697/* ---------------------------------------------------------------------- */
 698
 699static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 700{
 701    VFIODevice *vbasedev = opaque;
 702    VFIOMigration *migration = vbasedev->migration;
 703    uint32_t value, mask;
 704    int ret;
 705
 706    if (vbasedev->migration->vm_running == running) {
 707        return;
 708    }
 709
 710    if (running) {
 711        /*
 712         * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
 713         * Transition from _SAVING to _RUNNING can happen if there is migration
 714         * failure, in that case clear _SAVING bit.
 715         * Transition from _RESUMING to _RUNNING occurs during resuming
 716         * phase, in that case clear _RESUMING bit.
 717         * In both the above cases, set _RUNNING bit.
 718         */
 719        mask = ~VFIO_DEVICE_STATE_MASK;
 720        value = VFIO_DEVICE_STATE_RUNNING;
 721    } else {
 722        /*
 723         * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
 724         * _RUNNING bit
 725         */
 726        mask = ~VFIO_DEVICE_STATE_RUNNING;
 727
 728        /*
 729         * When VM state transition to stop for savevm command, device should
 730         * start saving data.
 731         */
 732        if (state == RUN_STATE_SAVE_VM) {
 733            value = VFIO_DEVICE_STATE_SAVING;
 734        } else {
 735            value = 0;
 736        }
 737    }
 738
 739    ret = vfio_migration_set_state(vbasedev, mask, value);
 740    if (ret) {
 741        /*
 742         * Migration should be aborted in this case, but vm_state_notify()
 743         * currently does not support reporting failures.
 744         */
 745        error_report("%s: Failed to set device state 0x%x", vbasedev->name,
 746                     (migration->device_state & mask) | value);
 747        qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
 748    }
 749    vbasedev->migration->vm_running = running;
 750    trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
 751            (migration->device_state & mask) | value);
 752}
 753
 754static void vfio_migration_state_notifier(Notifier *notifier, void *data)
 755{
 756    MigrationState *s = data;
 757    VFIOMigration *migration = container_of(notifier, VFIOMigration,
 758                                            migration_state);
 759    VFIODevice *vbasedev = migration->vbasedev;
 760    int ret;
 761
 762    trace_vfio_migration_state_notifier(vbasedev->name,
 763                                        MigrationStatus_str(s->state));
 764
 765    switch (s->state) {
 766    case MIGRATION_STATUS_CANCELLING:
 767    case MIGRATION_STATUS_CANCELLED:
 768    case MIGRATION_STATUS_FAILED:
 769        bytes_transferred = 0;
 770        ret = vfio_migration_set_state(vbasedev,
 771                      ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
 772                      VFIO_DEVICE_STATE_RUNNING);
 773        if (ret) {
 774            error_report("%s: Failed to set state RUNNING", vbasedev->name);
 775        }
 776    }
 777}
 778
 779static void vfio_migration_exit(VFIODevice *vbasedev)
 780{
 781    VFIOMigration *migration = vbasedev->migration;
 782
 783    vfio_region_exit(&migration->region);
 784    vfio_region_finalize(&migration->region);
 785    g_free(vbasedev->migration);
 786    vbasedev->migration = NULL;
 787}
 788
 789static int vfio_migration_init(VFIODevice *vbasedev,
 790                               struct vfio_region_info *info)
 791{
 792    int ret;
 793    Object *obj;
 794    VFIOMigration *migration;
 795    char id[256] = "";
 796    g_autofree char *path = NULL, *oid = NULL;
 797
 798    if (!vbasedev->ops->vfio_get_object) {
 799        return -EINVAL;
 800    }
 801
 802    obj = vbasedev->ops->vfio_get_object(vbasedev);
 803    if (!obj) {
 804        return -EINVAL;
 805    }
 806
 807    vbasedev->migration = g_new0(VFIOMigration, 1);
 808
 809    ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
 810                            info->index, "migration");
 811    if (ret) {
 812        error_report("%s: Failed to setup VFIO migration region %d: %s",
 813                     vbasedev->name, info->index, strerror(-ret));
 814        goto err;
 815    }
 816
 817    if (!vbasedev->migration->region.size) {
 818        error_report("%s: Invalid zero-sized VFIO migration region %d",
 819                     vbasedev->name, info->index);
 820        ret = -EINVAL;
 821        goto err;
 822    }
 823
 824    migration = vbasedev->migration;
 825    migration->vbasedev = vbasedev;
 826
 827    oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
 828    if (oid) {
 829        path = g_strdup_printf("%s/vfio", oid);
 830    } else {
 831        path = g_strdup("vfio");
 832    }
 833    strpadcpy(id, sizeof(id), path, '\0');
 834
 835    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
 836                         vbasedev);
 837
 838    migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
 839                                                           vfio_vmstate_change,
 840                                                           vbasedev);
 841    migration->migration_state.notify = vfio_migration_state_notifier;
 842    add_migration_state_change_notifier(&migration->migration_state);
 843    return 0;
 844
 845err:
 846    vfio_migration_exit(vbasedev);
 847    return ret;
 848}
 849
 850/* ---------------------------------------------------------------------- */
 851
 852int64_t vfio_mig_bytes_transferred(void)
 853{
 854    return bytes_transferred;
 855}
 856
 857int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 858{
 859    VFIOContainer *container = vbasedev->group->container;
 860    struct vfio_region_info *info = NULL;
 861    int ret = -ENOTSUP;
 862
 863    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
 864        goto add_blocker;
 865    }
 866
 867    ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
 868                                   VFIO_REGION_SUBTYPE_MIGRATION, &info);
 869    if (ret) {
 870        goto add_blocker;
 871    }
 872
 873    ret = vfio_migration_init(vbasedev, info);
 874    if (ret) {
 875        goto add_blocker;
 876    }
 877
 878    trace_vfio_migration_probe(vbasedev->name, info->index);
 879    g_free(info);
 880    return 0;
 881
 882add_blocker:
 883    error_setg(&vbasedev->migration_blocker,
 884               "VFIO device doesn't support migration");
 885    g_free(info);
 886
 887    ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
 888    if (ret < 0) {
 889        error_free(vbasedev->migration_blocker);
 890        vbasedev->migration_blocker = NULL;
 891    }
 892    return ret;
 893}
 894
 895void vfio_migration_finalize(VFIODevice *vbasedev)
 896{
 897    if (vbasedev->migration) {
 898        VFIOMigration *migration = vbasedev->migration;
 899
 900        remove_migration_state_change_notifier(&migration->migration_state);
 901        qemu_del_vm_change_state_handler(migration->vm_state);
 902        unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
 903        vfio_migration_exit(vbasedev);
 904    }
 905
 906    if (vbasedev->migration_blocker) {
 907        migrate_del_blocker(vbasedev->migration_blocker);
 908        error_free(vbasedev->migration_blocker);
 909        vbasedev->migration_blocker = NULL;
 910    }
 911}
 912