qemu/hw/vfio/migration.c
<<
>>
Prefs
   1/*
   2 * Migration support for VFIO devices
   3 *
   4 * Copyright NVIDIA, Inc. 2020
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2. See
   7 * the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/main-loop.h"
  12#include "qemu/cutils.h"
  13#include <linux/vfio.h>
  14#include <sys/ioctl.h>
  15
  16#include "sysemu/runstate.h"
  17#include "hw/vfio/vfio-common.h"
  18#include "cpu.h"
  19#include "migration/migration.h"
  20#include "migration/vmstate.h"
  21#include "migration/qemu-file.h"
  22#include "migration/register.h"
  23#include "migration/blocker.h"
  24#include "migration/misc.h"
  25#include "qapi/error.h"
  26#include "exec/ramlist.h"
  27#include "exec/ram_addr.h"
  28#include "pci.h"
  29#include "trace.h"
  30#include "hw/hw.h"
  31
  32/*
  33 * Flags to be used as unique delimiters for VFIO devices in the migration
  34 * stream. These flags are composed as:
  35 * 0xffffffff => MSB 32-bit all 1s
  36 * 0xef10     => Magic ID, represents emulated (virtual) function IO
  37 * 0x0000     => 16-bits reserved for flags
  38 *
  39 * The beginning of state information is marked by _DEV_CONFIG_STATE,
  40 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
  41 * certain state information is marked by _END_OF_STATE.
  42 */
  43#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
  44#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
  45#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
  46#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
  47
  48static int64_t bytes_transferred;
  49
  50static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
  51                                  off_t off, bool iswrite)
  52{
  53    int ret;
  54
  55    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
  56                    pread(vbasedev->fd, val, count, off);
  57    if (ret < count) {
  58        error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
  59                     HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
  60                     vbasedev->name, off, strerror(errno));
  61        return (ret < 0) ? ret : -EINVAL;
  62    }
  63    return 0;
  64}
  65
  66static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
  67                       off_t off, bool iswrite)
  68{
  69    int ret, done = 0;
  70    __u8 *tbuf = buf;
  71
  72    while (count) {
  73        int bytes = 0;
  74
  75        if (count >= 8 && !(off % 8)) {
  76            bytes = 8;
  77        } else if (count >= 4 && !(off % 4)) {
  78            bytes = 4;
  79        } else if (count >= 2 && !(off % 2)) {
  80            bytes = 2;
  81        } else {
  82            bytes = 1;
  83        }
  84
  85        ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
  86        if (ret) {
  87            return ret;
  88        }
  89
  90        count -= bytes;
  91        done += bytes;
  92        off += bytes;
  93        tbuf += bytes;
  94    }
  95    return done;
  96}
  97
  98#define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
  99#define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
 100
 101#define VFIO_MIG_STRUCT_OFFSET(f)       \
 102                                 offsetof(struct vfio_device_migration_info, f)
 103/*
 104 * Change the device_state register for device @vbasedev. Bits set in @mask
 105 * are preserved, bits set in @value are set, and bits not set in either @mask
 106 * or @value are cleared in device_state. If the register cannot be accessed,
 107 * the resulting state would be invalid, or the device enters an error state,
 108 * an error is returned.
 109 */
 110
 111static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
 112                                    uint32_t value)
 113{
 114    VFIOMigration *migration = vbasedev->migration;
 115    VFIORegion *region = &migration->region;
 116    off_t dev_state_off = region->fd_offset +
 117                          VFIO_MIG_STRUCT_OFFSET(device_state);
 118    uint32_t device_state;
 119    int ret;
 120
 121    ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
 122                        dev_state_off);
 123    if (ret < 0) {
 124        return ret;
 125    }
 126
 127    device_state = (device_state & mask) | value;
 128
 129    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
 130        return -EINVAL;
 131    }
 132
 133    ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
 134                         dev_state_off);
 135    if (ret < 0) {
 136        int rret;
 137
 138        rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
 139                             dev_state_off);
 140
 141        if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
 142            hw_error("%s: Device in error state 0x%x", vbasedev->name,
 143                     device_state);
 144            return rret ? rret : -EIO;
 145        }
 146        return ret;
 147    }
 148
 149    migration->device_state = device_state;
 150    trace_vfio_migration_set_state(vbasedev->name, device_state);
 151    return 0;
 152}
 153
 154static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
 155                                   uint64_t data_size, uint64_t *size)
 156{
 157    void *ptr = NULL;
 158    uint64_t limit = 0;
 159    int i;
 160
 161    if (!region->mmaps) {
 162        if (size) {
 163            *size = MIN(data_size, region->size - data_offset);
 164        }
 165        return ptr;
 166    }
 167
 168    for (i = 0; i < region->nr_mmaps; i++) {
 169        VFIOMmap *map = region->mmaps + i;
 170
 171        if ((data_offset >= map->offset) &&
 172            (data_offset < map->offset + map->size)) {
 173
 174            /* check if data_offset is within sparse mmap areas */
 175            ptr = map->mmap + data_offset - map->offset;
 176            if (size) {
 177                *size = MIN(data_size, map->offset + map->size - data_offset);
 178            }
 179            break;
 180        } else if ((data_offset < map->offset) &&
 181                   (!limit || limit > map->offset)) {
 182            /*
 183             * data_offset is not within sparse mmap areas, find size of
 184             * non-mapped area. Check through all list since region->mmaps list
 185             * is not sorted.
 186             */
 187            limit = map->offset;
 188        }
 189    }
 190
 191    if (!ptr && size) {
 192        *size = limit ? MIN(data_size, limit - data_offset) : data_size;
 193    }
 194    return ptr;
 195}
 196
 197static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
 198{
 199    VFIOMigration *migration = vbasedev->migration;
 200    VFIORegion *region = &migration->region;
 201    uint64_t data_offset = 0, data_size = 0, sz;
 202    int ret;
 203
 204    ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
 205                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
 206    if (ret < 0) {
 207        return ret;
 208    }
 209
 210    ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
 211                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
 212    if (ret < 0) {
 213        return ret;
 214    }
 215
 216    trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
 217                           migration->pending_bytes);
 218
 219    qemu_put_be64(f, data_size);
 220    sz = data_size;
 221
 222    while (sz) {
 223        void *buf;
 224        uint64_t sec_size;
 225        bool buf_allocated = false;
 226
 227        buf = get_data_section_size(region, data_offset, sz, &sec_size);
 228
 229        if (!buf) {
 230            buf = g_try_malloc(sec_size);
 231            if (!buf) {
 232                error_report("%s: Error allocating buffer ", __func__);
 233                return -ENOMEM;
 234            }
 235            buf_allocated = true;
 236
 237            ret = vfio_mig_read(vbasedev, buf, sec_size,
 238                                region->fd_offset + data_offset);
 239            if (ret < 0) {
 240                g_free(buf);
 241                return ret;
 242            }
 243        }
 244
 245        qemu_put_buffer(f, buf, sec_size);
 246
 247        if (buf_allocated) {
 248            g_free(buf);
 249        }
 250        sz -= sec_size;
 251        data_offset += sec_size;
 252    }
 253
 254    ret = qemu_file_get_error(f);
 255
 256    if (!ret && size) {
 257        *size = data_size;
 258    }
 259
 260    bytes_transferred += data_size;
 261    return ret;
 262}
 263
 264static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
 265                            uint64_t data_size)
 266{
 267    VFIORegion *region = &vbasedev->migration->region;
 268    uint64_t data_offset = 0, size, report_size;
 269    int ret;
 270
 271    do {
 272        ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
 273                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
 274        if (ret < 0) {
 275            return ret;
 276        }
 277
 278        if (data_offset + data_size > region->size) {
 279            /*
 280             * If data_size is greater than the data section of migration region
 281             * then iterate the write buffer operation. This case can occur if
 282             * size of migration region at destination is smaller than size of
 283             * migration region at source.
 284             */
 285            report_size = size = region->size - data_offset;
 286            data_size -= size;
 287        } else {
 288            report_size = size = data_size;
 289            data_size = 0;
 290        }
 291
 292        trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
 293
 294        while (size) {
 295            void *buf;
 296            uint64_t sec_size;
 297            bool buf_alloc = false;
 298
 299            buf = get_data_section_size(region, data_offset, size, &sec_size);
 300
 301            if (!buf) {
 302                buf = g_try_malloc(sec_size);
 303                if (!buf) {
 304                    error_report("%s: Error allocating buffer ", __func__);
 305                    return -ENOMEM;
 306                }
 307                buf_alloc = true;
 308            }
 309
 310            qemu_get_buffer(f, buf, sec_size);
 311
 312            if (buf_alloc) {
 313                ret = vfio_mig_write(vbasedev, buf, sec_size,
 314                        region->fd_offset + data_offset);
 315                g_free(buf);
 316
 317                if (ret < 0) {
 318                    return ret;
 319                }
 320            }
 321            size -= sec_size;
 322            data_offset += sec_size;
 323        }
 324
 325        ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
 326                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
 327        if (ret < 0) {
 328            return ret;
 329        }
 330    } while (data_size);
 331
 332    return 0;
 333}
 334
 335static int vfio_update_pending(VFIODevice *vbasedev)
 336{
 337    VFIOMigration *migration = vbasedev->migration;
 338    VFIORegion *region = &migration->region;
 339    uint64_t pending_bytes = 0;
 340    int ret;
 341
 342    ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
 343                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
 344    if (ret < 0) {
 345        migration->pending_bytes = 0;
 346        return ret;
 347    }
 348
 349    migration->pending_bytes = pending_bytes;
 350    trace_vfio_update_pending(vbasedev->name, pending_bytes);
 351    return 0;
 352}
 353
 354static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
 355{
 356    VFIODevice *vbasedev = opaque;
 357
 358    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
 359
 360    if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
 361        vbasedev->ops->vfio_save_config(vbasedev, f);
 362    }
 363
 364    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 365
 366    trace_vfio_save_device_config_state(vbasedev->name);
 367
 368    return qemu_file_get_error(f);
 369}
 370
 371static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
 372{
 373    VFIODevice *vbasedev = opaque;
 374    uint64_t data;
 375
 376    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
 377        int ret;
 378
 379        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
 380        if (ret) {
 381            error_report("%s: Failed to load device config space",
 382                         vbasedev->name);
 383            return ret;
 384        }
 385    }
 386
 387    data = qemu_get_be64(f);
 388    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
 389        error_report("%s: Failed loading device config space, "
 390                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
 391        return -EINVAL;
 392    }
 393
 394    trace_vfio_load_device_config_state(vbasedev->name);
 395    return qemu_file_get_error(f);
 396}
 397
 398static void vfio_migration_cleanup(VFIODevice *vbasedev)
 399{
 400    VFIOMigration *migration = vbasedev->migration;
 401
 402    if (migration->region.mmaps) {
 403        vfio_region_unmap(&migration->region);
 404    }
 405}
 406
 407/* ---------------------------------------------------------------------- */
 408
 409static int vfio_save_setup(QEMUFile *f, void *opaque)
 410{
 411    VFIODevice *vbasedev = opaque;
 412    VFIOMigration *migration = vbasedev->migration;
 413    int ret;
 414
 415    trace_vfio_save_setup(vbasedev->name);
 416
 417    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 418
 419    if (migration->region.mmaps) {
 420        /*
 421         * Calling vfio_region_mmap() from migration thread. Memory API called
 422         * from this function require locking the iothread when called from
 423         * outside the main loop thread.
 424         */
 425        qemu_mutex_lock_iothread();
 426        ret = vfio_region_mmap(&migration->region);
 427        qemu_mutex_unlock_iothread();
 428        if (ret) {
 429            error_report("%s: Failed to mmap VFIO migration region: %s",
 430                         vbasedev->name, strerror(-ret));
 431            error_report("%s: Falling back to slow path", vbasedev->name);
 432        }
 433    }
 434
 435    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
 436                                   VFIO_DEVICE_STATE_SAVING);
 437    if (ret) {
 438        error_report("%s: Failed to set state SAVING", vbasedev->name);
 439        return ret;
 440    }
 441
 442    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 443
 444    ret = qemu_file_get_error(f);
 445    if (ret) {
 446        return ret;
 447    }
 448
 449    return 0;
 450}
 451
 452static void vfio_save_cleanup(void *opaque)
 453{
 454    VFIODevice *vbasedev = opaque;
 455
 456    vfio_migration_cleanup(vbasedev);
 457    trace_vfio_save_cleanup(vbasedev->name);
 458}
 459
 460static void vfio_save_pending(QEMUFile *f, void *opaque,
 461                              uint64_t threshold_size,
 462                              uint64_t *res_precopy_only,
 463                              uint64_t *res_compatible,
 464                              uint64_t *res_postcopy_only)
 465{
 466    VFIODevice *vbasedev = opaque;
 467    VFIOMigration *migration = vbasedev->migration;
 468    int ret;
 469
 470    ret = vfio_update_pending(vbasedev);
 471    if (ret) {
 472        return;
 473    }
 474
 475    *res_precopy_only += migration->pending_bytes;
 476
 477    trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
 478                            *res_postcopy_only, *res_compatible);
 479}
 480
 481static int vfio_save_iterate(QEMUFile *f, void *opaque)
 482{
 483    VFIODevice *vbasedev = opaque;
 484    VFIOMigration *migration = vbasedev->migration;
 485    uint64_t data_size;
 486    int ret;
 487
 488    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 489
 490    if (migration->pending_bytes == 0) {
 491        ret = vfio_update_pending(vbasedev);
 492        if (ret) {
 493            return ret;
 494        }
 495
 496        if (migration->pending_bytes == 0) {
 497            qemu_put_be64(f, 0);
 498            qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 499            /* indicates data finished, goto complete phase */
 500            return 1;
 501        }
 502    }
 503
 504    ret = vfio_save_buffer(f, vbasedev, &data_size);
 505    if (ret) {
 506        error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
 507                     strerror(errno));
 508        return ret;
 509    }
 510
 511    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 512
 513    ret = qemu_file_get_error(f);
 514    if (ret) {
 515        return ret;
 516    }
 517
 518    /*
 519     * Reset pending_bytes as .save_live_pending is not called during savevm or
 520     * snapshot case, in such case vfio_update_pending() at the start of this
 521     * function updates pending_bytes.
 522     */
 523    migration->pending_bytes = 0;
 524    trace_vfio_save_iterate(vbasedev->name, data_size);
 525    return 0;
 526}
 527
 528static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
 529{
 530    VFIODevice *vbasedev = opaque;
 531    VFIOMigration *migration = vbasedev->migration;
 532    uint64_t data_size;
 533    int ret;
 534
 535    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
 536                                   VFIO_DEVICE_STATE_SAVING);
 537    if (ret) {
 538        error_report("%s: Failed to set state STOP and SAVING",
 539                     vbasedev->name);
 540        return ret;
 541    }
 542
 543    ret = vfio_update_pending(vbasedev);
 544    if (ret) {
 545        return ret;
 546    }
 547
 548    while (migration->pending_bytes > 0) {
 549        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 550        ret = vfio_save_buffer(f, vbasedev, &data_size);
 551        if (ret < 0) {
 552            error_report("%s: Failed to save buffer", vbasedev->name);
 553            return ret;
 554        }
 555
 556        if (data_size == 0) {
 557            break;
 558        }
 559
 560        ret = vfio_update_pending(vbasedev);
 561        if (ret) {
 562            return ret;
 563        }
 564    }
 565
 566    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 567
 568    ret = qemu_file_get_error(f);
 569    if (ret) {
 570        return ret;
 571    }
 572
 573    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
 574    if (ret) {
 575        error_report("%s: Failed to set state STOPPED", vbasedev->name);
 576        return ret;
 577    }
 578
 579    trace_vfio_save_complete_precopy(vbasedev->name);
 580    return ret;
 581}
 582
 583static void vfio_save_state(QEMUFile *f, void *opaque)
 584{
 585    VFIODevice *vbasedev = opaque;
 586    int ret;
 587
 588    ret = vfio_save_device_config_state(f, opaque);
 589    if (ret) {
 590        error_report("%s: Failed to save device config space",
 591                     vbasedev->name);
 592        qemu_file_set_error(f, ret);
 593    }
 594}
 595
 596static int vfio_load_setup(QEMUFile *f, void *opaque)
 597{
 598    VFIODevice *vbasedev = opaque;
 599    VFIOMigration *migration = vbasedev->migration;
 600    int ret = 0;
 601
 602    if (migration->region.mmaps) {
 603        ret = vfio_region_mmap(&migration->region);
 604        if (ret) {
 605            error_report("%s: Failed to mmap VFIO migration region %d: %s",
 606                         vbasedev->name, migration->region.nr,
 607                         strerror(-ret));
 608            error_report("%s: Falling back to slow path", vbasedev->name);
 609        }
 610    }
 611
 612    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
 613                                   VFIO_DEVICE_STATE_RESUMING);
 614    if (ret) {
 615        error_report("%s: Failed to set state RESUMING", vbasedev->name);
 616        if (migration->region.mmaps) {
 617            vfio_region_unmap(&migration->region);
 618        }
 619    }
 620    return ret;
 621}
 622
 623static int vfio_load_cleanup(void *opaque)
 624{
 625    VFIODevice *vbasedev = opaque;
 626
 627    vfio_migration_cleanup(vbasedev);
 628    trace_vfio_load_cleanup(vbasedev->name);
 629    return 0;
 630}
 631
 632static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
 633{
 634    VFIODevice *vbasedev = opaque;
 635    int ret = 0;
 636    uint64_t data;
 637
 638    data = qemu_get_be64(f);
 639    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
 640
 641        trace_vfio_load_state(vbasedev->name, data);
 642
 643        switch (data) {
 644        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
 645        {
 646            return vfio_load_device_config_state(f, opaque);
 647        }
 648        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
 649        {
 650            data = qemu_get_be64(f);
 651            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
 652                return ret;
 653            } else {
 654                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
 655                             vbasedev->name, data);
 656                return -EINVAL;
 657            }
 658            break;
 659        }
 660        case VFIO_MIG_FLAG_DEV_DATA_STATE:
 661        {
 662            uint64_t data_size = qemu_get_be64(f);
 663
 664            if (data_size) {
 665                ret = vfio_load_buffer(f, vbasedev, data_size);
 666                if (ret < 0) {
 667                    return ret;
 668                }
 669            }
 670            break;
 671        }
 672        default:
 673            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
 674            return -EINVAL;
 675        }
 676
 677        data = qemu_get_be64(f);
 678        ret = qemu_file_get_error(f);
 679        if (ret) {
 680            return ret;
 681        }
 682    }
 683    return ret;
 684}
 685
 686static SaveVMHandlers savevm_vfio_handlers = {
 687    .save_setup = vfio_save_setup,
 688    .save_cleanup = vfio_save_cleanup,
 689    .save_live_pending = vfio_save_pending,
 690    .save_live_iterate = vfio_save_iterate,
 691    .save_live_complete_precopy = vfio_save_complete_precopy,
 692    .save_state = vfio_save_state,
 693    .load_setup = vfio_load_setup,
 694    .load_cleanup = vfio_load_cleanup,
 695    .load_state = vfio_load_state,
 696};
 697
 698/* ---------------------------------------------------------------------- */
 699
 700static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 701{
 702    VFIODevice *vbasedev = opaque;
 703    VFIOMigration *migration = vbasedev->migration;
 704    uint32_t value, mask;
 705    int ret;
 706
 707    if (vbasedev->migration->vm_running == running) {
 708        return;
 709    }
 710
 711    if (running) {
 712        /*
 713         * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
 714         * Transition from _SAVING to _RUNNING can happen if there is migration
 715         * failure, in that case clear _SAVING bit.
 716         * Transition from _RESUMING to _RUNNING occurs during resuming
 717         * phase, in that case clear _RESUMING bit.
 718         * In both the above cases, set _RUNNING bit.
 719         */
 720        mask = ~VFIO_DEVICE_STATE_MASK;
 721        value = VFIO_DEVICE_STATE_RUNNING;
 722    } else {
 723        /*
 724         * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
 725         * _RUNNING bit
 726         */
 727        mask = ~VFIO_DEVICE_STATE_RUNNING;
 728        value = 0;
 729    }
 730
 731    ret = vfio_migration_set_state(vbasedev, mask, value);
 732    if (ret) {
 733        /*
 734         * Migration should be aborted in this case, but vm_state_notify()
 735         * currently does not support reporting failures.
 736         */
 737        error_report("%s: Failed to set device state 0x%x", vbasedev->name,
 738                     (migration->device_state & mask) | value);
 739        qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
 740    }
 741    vbasedev->migration->vm_running = running;
 742    trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
 743            (migration->device_state & mask) | value);
 744}
 745
 746static void vfio_migration_state_notifier(Notifier *notifier, void *data)
 747{
 748    MigrationState *s = data;
 749    VFIOMigration *migration = container_of(notifier, VFIOMigration,
 750                                            migration_state);
 751    VFIODevice *vbasedev = migration->vbasedev;
 752    int ret;
 753
 754    trace_vfio_migration_state_notifier(vbasedev->name,
 755                                        MigrationStatus_str(s->state));
 756
 757    switch (s->state) {
 758    case MIGRATION_STATUS_CANCELLING:
 759    case MIGRATION_STATUS_CANCELLED:
 760    case MIGRATION_STATUS_FAILED:
 761        bytes_transferred = 0;
 762        ret = vfio_migration_set_state(vbasedev,
 763                      ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
 764                      VFIO_DEVICE_STATE_RUNNING);
 765        if (ret) {
 766            error_report("%s: Failed to set state RUNNING", vbasedev->name);
 767        }
 768    }
 769}
 770
 771static void vfio_migration_exit(VFIODevice *vbasedev)
 772{
 773    VFIOMigration *migration = vbasedev->migration;
 774
 775    vfio_region_exit(&migration->region);
 776    vfio_region_finalize(&migration->region);
 777    g_free(vbasedev->migration);
 778    vbasedev->migration = NULL;
 779}
 780
 781static int vfio_migration_init(VFIODevice *vbasedev,
 782                               struct vfio_region_info *info)
 783{
 784    int ret;
 785    Object *obj;
 786    VFIOMigration *migration;
 787    char id[256] = "";
 788    g_autofree char *path = NULL, *oid = NULL;
 789
 790    if (!vbasedev->ops->vfio_get_object) {
 791        return -EINVAL;
 792    }
 793
 794    obj = vbasedev->ops->vfio_get_object(vbasedev);
 795    if (!obj) {
 796        return -EINVAL;
 797    }
 798
 799    vbasedev->migration = g_new0(VFIOMigration, 1);
 800
 801    ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
 802                            info->index, "migration");
 803    if (ret) {
 804        error_report("%s: Failed to setup VFIO migration region %d: %s",
 805                     vbasedev->name, info->index, strerror(-ret));
 806        goto err;
 807    }
 808
 809    if (!vbasedev->migration->region.size) {
 810        error_report("%s: Invalid zero-sized VFIO migration region %d",
 811                     vbasedev->name, info->index);
 812        ret = -EINVAL;
 813        goto err;
 814    }
 815
 816    migration = vbasedev->migration;
 817    migration->vbasedev = vbasedev;
 818
 819    oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
 820    if (oid) {
 821        path = g_strdup_printf("%s/vfio", oid);
 822    } else {
 823        path = g_strdup("vfio");
 824    }
 825    strpadcpy(id, sizeof(id), path, '\0');
 826
 827    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
 828                         vbasedev);
 829
 830    migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
 831                                                           vfio_vmstate_change,
 832                                                           vbasedev);
 833    migration->migration_state.notify = vfio_migration_state_notifier;
 834    add_migration_state_change_notifier(&migration->migration_state);
 835    return 0;
 836
 837err:
 838    vfio_migration_exit(vbasedev);
 839    return ret;
 840}
 841
 842/* ---------------------------------------------------------------------- */
 843
 844int64_t vfio_mig_bytes_transferred(void)
 845{
 846    return bytes_transferred;
 847}
 848
 849int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 850{
 851    VFIOContainer *container = vbasedev->group->container;
 852    struct vfio_region_info *info = NULL;
 853    Error *local_err = NULL;
 854    int ret = -ENOTSUP;
 855
 856    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
 857        goto add_blocker;
 858    }
 859
 860    ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
 861                                   VFIO_REGION_SUBTYPE_MIGRATION, &info);
 862    if (ret) {
 863        goto add_blocker;
 864    }
 865
 866    ret = vfio_migration_init(vbasedev, info);
 867    if (ret) {
 868        goto add_blocker;
 869    }
 870
 871    trace_vfio_migration_probe(vbasedev->name, info->index);
 872    g_free(info);
 873    return 0;
 874
 875add_blocker:
 876    error_setg(&vbasedev->migration_blocker,
 877               "VFIO device doesn't support migration");
 878    g_free(info);
 879
 880    ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
 881    if (local_err) {
 882        error_propagate(errp, local_err);
 883        error_free(vbasedev->migration_blocker);
 884        vbasedev->migration_blocker = NULL;
 885    }
 886    return ret;
 887}
 888
 889void vfio_migration_finalize(VFIODevice *vbasedev)
 890{
 891    if (vbasedev->migration) {
 892        VFIOMigration *migration = vbasedev->migration;
 893
 894        remove_migration_state_change_notifier(&migration->migration_state);
 895        qemu_del_vm_change_state_handler(migration->vm_state);
 896        vfio_migration_exit(vbasedev);
 897    }
 898
 899    if (vbasedev->migration_blocker) {
 900        migrate_del_blocker(vbasedev->migration_blocker);
 901        error_free(vbasedev->migration_blocker);
 902        vbasedev->migration_blocker = NULL;
 903    }
 904}
 905