qemu/block-migration.c
<<
>>
Prefs
   1/*
   2 * QEMU live block migration
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Liran Schour   <lirans@il.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu-common.h"
  17#include "block/block_int.h"
  18#include "hw/hw.h"
  19#include "qemu/queue.h"
  20#include "qemu/timer.h"
  21#include "migration/block.h"
  22#include "migration/migration.h"
  23#include "sysemu/blockdev.h"
  24#include <assert.h>
  25
  26#define BLOCK_SIZE                       (1 << 20)
  27#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
  28
  29#define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
  30#define BLK_MIG_FLAG_EOS                0x02
  31#define BLK_MIG_FLAG_PROGRESS           0x04
  32
  33#define MAX_IS_ALLOCATED_SEARCH 65536
  34
  35//#define DEBUG_BLK_MIGRATION
  36
  37#ifdef DEBUG_BLK_MIGRATION
  38#define DPRINTF(fmt, ...) \
  39    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
  40#else
  41#define DPRINTF(fmt, ...) \
  42    do { } while (0)
  43#endif
  44
  45typedef struct BlkMigDevState {
  46    /* Written during setup phase.  Can be read without a lock.  */
  47    BlockDriverState *bs;
  48    int shared_base;
  49    int64_t total_sectors;
  50    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
  51
  52    /* Only used by migration thread.  Does not need a lock.  */
  53    int bulk_completed;
  54    int64_t cur_sector;
  55    int64_t cur_dirty;
  56
  57    /* Protected by block migration lock.  */
  58    unsigned long *aio_bitmap;
  59    int64_t completed_sectors;
  60} BlkMigDevState;
  61
  62typedef struct BlkMigBlock {
  63    /* Only used by migration thread.  */
  64    uint8_t *buf;
  65    BlkMigDevState *bmds;
  66    int64_t sector;
  67    int nr_sectors;
  68    struct iovec iov;
  69    QEMUIOVector qiov;
  70    BlockDriverAIOCB *aiocb;
  71
  72    /* Protected by block migration lock.  */
  73    int ret;
  74    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
  75} BlkMigBlock;
  76
  77typedef struct BlkMigState {
  78    /* Written during setup phase.  Can be read without a lock.  */
  79    int blk_enable;
  80    int shared_base;
  81    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
  82    int64_t total_sector_sum;
  83
  84    /* Protected by lock.  */
  85    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
  86    int submitted;
  87    int read_done;
  88
  89    /* Only used by migration thread.  Does not need a lock.  */
  90    int transferred;
  91    int prev_progress;
  92    int bulk_completed;
  93
  94    /* Lock must be taken _inside_ the iothread lock.  */
  95    QemuMutex lock;
  96} BlkMigState;
  97
  98static BlkMigState block_mig_state;
  99
 100static void blk_mig_lock(void)
 101{
 102    qemu_mutex_lock(&block_mig_state.lock);
 103}
 104
 105static void blk_mig_unlock(void)
 106{
 107    qemu_mutex_unlock(&block_mig_state.lock);
 108}
 109
 110/* Must run outside of the iothread lock during the bulk phase,
 111 * or the VM will stall.
 112 */
 113
 114static void blk_send(QEMUFile *f, BlkMigBlock * blk)
 115{
 116    int len;
 117
 118    /* sector number and flags */
 119    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
 120                     | BLK_MIG_FLAG_DEVICE_BLOCK);
 121
 122    /* device name */
 123    len = strlen(blk->bmds->bs->device_name);
 124    qemu_put_byte(f, len);
 125    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
 126
 127    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
 128}
 129
 130int blk_mig_active(void)
 131{
 132    return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
 133}
 134
 135uint64_t blk_mig_bytes_transferred(void)
 136{
 137    BlkMigDevState *bmds;
 138    uint64_t sum = 0;
 139
 140    blk_mig_lock();
 141    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 142        sum += bmds->completed_sectors;
 143    }
 144    blk_mig_unlock();
 145    return sum << BDRV_SECTOR_BITS;
 146}
 147
 148uint64_t blk_mig_bytes_remaining(void)
 149{
 150    return blk_mig_bytes_total() - blk_mig_bytes_transferred();
 151}
 152
 153uint64_t blk_mig_bytes_total(void)
 154{
 155    BlkMigDevState *bmds;
 156    uint64_t sum = 0;
 157
 158    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 159        sum += bmds->total_sectors;
 160    }
 161    return sum << BDRV_SECTOR_BITS;
 162}
 163
 164
 165/* Called with migration lock held.  */
 166
 167static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
 168{
 169    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
 170
 171    if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
 172        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
 173            (1UL << (chunk % (sizeof(unsigned long) * 8))));
 174    } else {
 175        return 0;
 176    }
 177}
 178
 179/* Called with migration lock held.  */
 180
 181static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
 182                             int nb_sectors, int set)
 183{
 184    int64_t start, end;
 185    unsigned long val, idx, bit;
 186
 187    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
 188    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
 189
 190    for (; start <= end; start++) {
 191        idx = start / (sizeof(unsigned long) * 8);
 192        bit = start % (sizeof(unsigned long) * 8);
 193        val = bmds->aio_bitmap[idx];
 194        if (set) {
 195            val |= 1UL << bit;
 196        } else {
 197            val &= ~(1UL << bit);
 198        }
 199        bmds->aio_bitmap[idx] = val;
 200    }
 201}
 202
 203static void alloc_aio_bitmap(BlkMigDevState *bmds)
 204{
 205    BlockDriverState *bs = bmds->bs;
 206    int64_t bitmap_size;
 207
 208    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
 209            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
 210    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
 211
 212    bmds->aio_bitmap = g_malloc0(bitmap_size);
 213}
 214
 215/* Never hold migration lock when yielding to the main loop!  */
 216
 217static void blk_mig_read_cb(void *opaque, int ret)
 218{
 219    BlkMigBlock *blk = opaque;
 220
 221    blk_mig_lock();
 222    blk->ret = ret;
 223
 224    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
 225    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
 226
 227    block_mig_state.submitted--;
 228    block_mig_state.read_done++;
 229    assert(block_mig_state.submitted >= 0);
 230    blk_mig_unlock();
 231}
 232
 233/* Called with no lock taken.  */
 234
 235static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
 236{
 237    int64_t total_sectors = bmds->total_sectors;
 238    int64_t cur_sector = bmds->cur_sector;
 239    BlockDriverState *bs = bmds->bs;
 240    BlkMigBlock *blk;
 241    int nr_sectors;
 242
 243    if (bmds->shared_base) {
 244        qemu_mutex_lock_iothread();
 245        while (cur_sector < total_sectors &&
 246               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
 247                                  &nr_sectors)) {
 248            cur_sector += nr_sectors;
 249        }
 250        qemu_mutex_unlock_iothread();
 251    }
 252
 253    if (cur_sector >= total_sectors) {
 254        bmds->cur_sector = bmds->completed_sectors = total_sectors;
 255        return 1;
 256    }
 257
 258    bmds->completed_sectors = cur_sector;
 259
 260    cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
 261
 262    /* we are going to transfer a full block even if it is not allocated */
 263    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
 264
 265    if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
 266        nr_sectors = total_sectors - cur_sector;
 267    }
 268
 269    blk = g_malloc(sizeof(BlkMigBlock));
 270    blk->buf = g_malloc(BLOCK_SIZE);
 271    blk->bmds = bmds;
 272    blk->sector = cur_sector;
 273    blk->nr_sectors = nr_sectors;
 274
 275    blk->iov.iov_base = blk->buf;
 276    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
 277    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
 278
 279    blk_mig_lock();
 280    block_mig_state.submitted++;
 281    blk_mig_unlock();
 282
 283    qemu_mutex_lock_iothread();
 284    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
 285                                nr_sectors, blk_mig_read_cb, blk);
 286
 287    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
 288    qemu_mutex_unlock_iothread();
 289
 290    bmds->cur_sector = cur_sector + nr_sectors;
 291    return (bmds->cur_sector >= total_sectors);
 292}
 293
 294/* Called with iothread lock taken.  */
 295
 296static void set_dirty_tracking(int enable)
 297{
 298    BlkMigDevState *bmds;
 299
 300    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 301        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
 302    }
 303}
 304
 305static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
 306{
 307    BlkMigDevState *bmds;
 308    int64_t sectors;
 309
 310    if (!bdrv_is_read_only(bs)) {
 311        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
 312        if (sectors <= 0) {
 313            return;
 314        }
 315
 316        bmds = g_malloc0(sizeof(BlkMigDevState));
 317        bmds->bs = bs;
 318        bmds->bulk_completed = 0;
 319        bmds->total_sectors = sectors;
 320        bmds->completed_sectors = 0;
 321        bmds->shared_base = block_mig_state.shared_base;
 322        alloc_aio_bitmap(bmds);
 323        drive_get_ref(drive_get_by_blockdev(bs));
 324        bdrv_set_in_use(bs, 1);
 325
 326        block_mig_state.total_sector_sum += sectors;
 327
 328        if (bmds->shared_base) {
 329            DPRINTF("Start migration for %s with shared base image\n",
 330                    bs->device_name);
 331        } else {
 332            DPRINTF("Start full migration for %s\n", bs->device_name);
 333        }
 334
 335        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
 336    }
 337}
 338
 339static void init_blk_migration(QEMUFile *f)
 340{
 341    block_mig_state.submitted = 0;
 342    block_mig_state.read_done = 0;
 343    block_mig_state.transferred = 0;
 344    block_mig_state.total_sector_sum = 0;
 345    block_mig_state.prev_progress = -1;
 346    block_mig_state.bulk_completed = 0;
 347
 348    bdrv_iterate(init_blk_migration_it, NULL);
 349}
 350
 351/* Called with no lock taken.  */
 352
 353static int blk_mig_save_bulked_block(QEMUFile *f)
 354{
 355    int64_t completed_sector_sum = 0;
 356    BlkMigDevState *bmds;
 357    int progress;
 358    int ret = 0;
 359
 360    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 361        if (bmds->bulk_completed == 0) {
 362            if (mig_save_device_bulk(f, bmds) == 1) {
 363                /* completed bulk section for this device */
 364                bmds->bulk_completed = 1;
 365            }
 366            completed_sector_sum += bmds->completed_sectors;
 367            ret = 1;
 368            break;
 369        } else {
 370            completed_sector_sum += bmds->completed_sectors;
 371        }
 372    }
 373
 374    if (block_mig_state.total_sector_sum != 0) {
 375        progress = completed_sector_sum * 100 /
 376                   block_mig_state.total_sector_sum;
 377    } else {
 378        progress = 100;
 379    }
 380    if (progress != block_mig_state.prev_progress) {
 381        block_mig_state.prev_progress = progress;
 382        qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
 383                         | BLK_MIG_FLAG_PROGRESS);
 384        DPRINTF("Completed %d %%\r", progress);
 385    }
 386
 387    return ret;
 388}
 389
 390static void blk_mig_reset_dirty_cursor(void)
 391{
 392    BlkMigDevState *bmds;
 393
 394    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 395        bmds->cur_dirty = 0;
 396    }
 397}
 398
 399/* Called with iothread lock taken.  */
 400
 401static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
 402                                 int is_async)
 403{
 404    BlkMigBlock *blk;
 405    int64_t total_sectors = bmds->total_sectors;
 406    int64_t sector;
 407    int nr_sectors;
 408    int ret = -EIO;
 409
 410    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
 411        blk_mig_lock();
 412        if (bmds_aio_inflight(bmds, sector)) {
 413            blk_mig_unlock();
 414            bdrv_drain_all();
 415        } else {
 416            blk_mig_unlock();
 417        }
 418        if (bdrv_get_dirty(bmds->bs, sector)) {
 419
 420            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
 421                nr_sectors = total_sectors - sector;
 422            } else {
 423                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
 424            }
 425            blk = g_malloc(sizeof(BlkMigBlock));
 426            blk->buf = g_malloc(BLOCK_SIZE);
 427            blk->bmds = bmds;
 428            blk->sector = sector;
 429            blk->nr_sectors = nr_sectors;
 430
 431            if (is_async) {
 432                blk->iov.iov_base = blk->buf;
 433                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
 434                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
 435
 436                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
 437                                            nr_sectors, blk_mig_read_cb, blk);
 438
 439                blk_mig_lock();
 440                block_mig_state.submitted++;
 441                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
 442                blk_mig_unlock();
 443            } else {
 444                ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
 445                if (ret < 0) {
 446                    goto error;
 447                }
 448                blk_send(f, blk);
 449
 450                g_free(blk->buf);
 451                g_free(blk);
 452            }
 453
 454            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
 455            break;
 456        }
 457        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
 458        bmds->cur_dirty = sector;
 459    }
 460
 461    return (bmds->cur_dirty >= bmds->total_sectors);
 462
 463error:
 464    DPRINTF("Error reading sector %" PRId64 "\n", sector);
 465    g_free(blk->buf);
 466    g_free(blk);
 467    return ret;
 468}
 469
 470/* Called with iothread lock taken.
 471 *
 472 * return value:
 473 * 0: too much data for max_downtime
 474 * 1: few enough data for max_downtime
 475*/
 476static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
 477{
 478    BlkMigDevState *bmds;
 479    int ret = 1;
 480
 481    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 482        ret = mig_save_device_dirty(f, bmds, is_async);
 483        if (ret <= 0) {
 484            break;
 485        }
 486    }
 487
 488    return ret;
 489}
 490
 491/* Called with no locks taken.  */
 492
 493static int flush_blks(QEMUFile *f)
 494{
 495    BlkMigBlock *blk;
 496    int ret = 0;
 497
 498    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
 499            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
 500            block_mig_state.transferred);
 501
 502    blk_mig_lock();
 503    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
 504        if (qemu_file_rate_limit(f)) {
 505            break;
 506        }
 507        if (blk->ret < 0) {
 508            ret = blk->ret;
 509            break;
 510        }
 511
 512        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
 513        blk_mig_unlock();
 514        blk_send(f, blk);
 515        blk_mig_lock();
 516
 517        g_free(blk->buf);
 518        g_free(blk);
 519
 520        block_mig_state.read_done--;
 521        block_mig_state.transferred++;
 522        assert(block_mig_state.read_done >= 0);
 523    }
 524    blk_mig_unlock();
 525
 526    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
 527            block_mig_state.submitted, block_mig_state.read_done,
 528            block_mig_state.transferred);
 529    return ret;
 530}
 531
 532/* Called with iothread lock taken.  */
 533
 534static int64_t get_remaining_dirty(void)
 535{
 536    BlkMigDevState *bmds;
 537    int64_t dirty = 0;
 538
 539    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
 540        dirty += bdrv_get_dirty_count(bmds->bs);
 541    }
 542
 543    return dirty << BDRV_SECTOR_BITS;
 544}
 545
 546/* Called with iothread lock taken.  */
 547
 548static void blk_mig_cleanup(void)
 549{
 550    BlkMigDevState *bmds;
 551    BlkMigBlock *blk;
 552
 553    bdrv_drain_all();
 554
 555    set_dirty_tracking(0);
 556
 557    blk_mig_lock();
 558    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
 559        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
 560        bdrv_set_in_use(bmds->bs, 0);
 561        drive_put_ref(drive_get_by_blockdev(bmds->bs));
 562        g_free(bmds->aio_bitmap);
 563        g_free(bmds);
 564    }
 565
 566    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
 567        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
 568        g_free(blk->buf);
 569        g_free(blk);
 570    }
 571    blk_mig_unlock();
 572}
 573
 574static void block_migration_cancel(void *opaque)
 575{
 576    blk_mig_cleanup();
 577}
 578
 579static int block_save_setup(QEMUFile *f, void *opaque)
 580{
 581    int ret;
 582
 583    DPRINTF("Enter save live setup submitted %d transferred %d\n",
 584            block_mig_state.submitted, block_mig_state.transferred);
 585
 586    qemu_mutex_lock_iothread();
 587    init_blk_migration(f);
 588
 589    /* start track dirty blocks */
 590    set_dirty_tracking(1);
 591    qemu_mutex_unlock_iothread();
 592
 593    ret = flush_blks(f);
 594    blk_mig_reset_dirty_cursor();
 595    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 596
 597    return ret;
 598}
 599
 600static int block_save_iterate(QEMUFile *f, void *opaque)
 601{
 602    int ret;
 603    int64_t last_ftell = qemu_ftell(f);
 604
 605    DPRINTF("Enter save live iterate submitted %d transferred %d\n",
 606            block_mig_state.submitted, block_mig_state.transferred);
 607
 608    ret = flush_blks(f);
 609    if (ret) {
 610        return ret;
 611    }
 612
 613    blk_mig_reset_dirty_cursor();
 614
 615    /* control the rate of transfer */
 616    blk_mig_lock();
 617    while ((block_mig_state.submitted +
 618            block_mig_state.read_done) * BLOCK_SIZE <
 619           qemu_file_get_rate_limit(f)) {
 620        blk_mig_unlock();
 621        if (block_mig_state.bulk_completed == 0) {
 622            /* first finish the bulk phase */
 623            if (blk_mig_save_bulked_block(f) == 0) {
 624                /* finished saving bulk on all devices */
 625                block_mig_state.bulk_completed = 1;
 626            }
 627            ret = 0;
 628        } else {
 629            /* Always called with iothread lock taken for
 630             * simplicity, block_save_complete also calls it.
 631             */
 632            qemu_mutex_lock_iothread();
 633            ret = blk_mig_save_dirty_block(f, 1);
 634            qemu_mutex_unlock_iothread();
 635        }
 636        if (ret < 0) {
 637            return ret;
 638        }
 639        blk_mig_lock();
 640        if (ret != 0) {
 641            /* no more dirty blocks */
 642            break;
 643        }
 644    }
 645    blk_mig_unlock();
 646
 647    ret = flush_blks(f);
 648    if (ret) {
 649        return ret;
 650    }
 651
 652    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 653    return qemu_ftell(f) - last_ftell;
 654}
 655
 656/* Called with iothread lock taken.  */
 657
 658static int block_save_complete(QEMUFile *f, void *opaque)
 659{
 660    int ret;
 661
 662    DPRINTF("Enter save live complete submitted %d transferred %d\n",
 663            block_mig_state.submitted, block_mig_state.transferred);
 664
 665    ret = flush_blks(f);
 666    if (ret) {
 667        return ret;
 668    }
 669
 670    blk_mig_reset_dirty_cursor();
 671
 672    /* we know for sure that save bulk is completed and
 673       all async read completed */
 674    blk_mig_lock();
 675    assert(block_mig_state.submitted == 0);
 676    blk_mig_unlock();
 677
 678    do {
 679        ret = blk_mig_save_dirty_block(f, 0);
 680        if (ret < 0) {
 681            return ret;
 682        }
 683    } while (ret == 0);
 684
 685    /* report completion */
 686    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
 687
 688    DPRINTF("Block migration completed\n");
 689
 690    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 691
 692    blk_mig_cleanup();
 693    return 0;
 694}
 695
 696static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
 697{
 698    /* Estimate pending number of bytes to send */
 699    uint64_t pending;
 700
 701    qemu_mutex_lock_iothread();
 702    blk_mig_lock();
 703    pending = get_remaining_dirty() +
 704                       block_mig_state.submitted * BLOCK_SIZE +
 705                       block_mig_state.read_done * BLOCK_SIZE;
 706
 707    /* Report at least one block pending during bulk phase */
 708    if (pending == 0 && !block_mig_state.bulk_completed) {
 709        pending = BLOCK_SIZE;
 710    }
 711    blk_mig_unlock();
 712    qemu_mutex_unlock_iothread();
 713
 714    DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
 715    return pending;
 716}
 717
 718static int block_load(QEMUFile *f, void *opaque, int version_id)
 719{
 720    static int banner_printed;
 721    int len, flags;
 722    char device_name[256];
 723    int64_t addr;
 724    BlockDriverState *bs, *bs_prev = NULL;
 725    uint8_t *buf;
 726    int64_t total_sectors = 0;
 727    int nr_sectors;
 728    int ret;
 729
 730    do {
 731        addr = qemu_get_be64(f);
 732
 733        flags = addr & ~BDRV_SECTOR_MASK;
 734        addr >>= BDRV_SECTOR_BITS;
 735
 736        if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
 737            /* get device name */
 738            len = qemu_get_byte(f);
 739            qemu_get_buffer(f, (uint8_t *)device_name, len);
 740            device_name[len] = '\0';
 741
 742            bs = bdrv_find(device_name);
 743            if (!bs) {
 744                fprintf(stderr, "Error unknown block device %s\n",
 745                        device_name);
 746                return -EINVAL;
 747            }
 748
 749            if (bs != bs_prev) {
 750                bs_prev = bs;
 751                total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
 752                if (total_sectors <= 0) {
 753                    error_report("Error getting length of block device %s",
 754                                 device_name);
 755                    return -EINVAL;
 756                }
 757            }
 758
 759            if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
 760                nr_sectors = total_sectors - addr;
 761            } else {
 762                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
 763            }
 764
 765            buf = g_malloc(BLOCK_SIZE);
 766
 767            qemu_get_buffer(f, buf, BLOCK_SIZE);
 768            ret = bdrv_write(bs, addr, buf, nr_sectors);
 769
 770            g_free(buf);
 771            if (ret < 0) {
 772                return ret;
 773            }
 774        } else if (flags & BLK_MIG_FLAG_PROGRESS) {
 775            if (!banner_printed) {
 776                printf("Receiving block device images\n");
 777                banner_printed = 1;
 778            }
 779            printf("Completed %d %%%c", (int)addr,
 780                   (addr == 100) ? '\n' : '\r');
 781            fflush(stdout);
 782        } else if (!(flags & BLK_MIG_FLAG_EOS)) {
 783            fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
 784            return -EINVAL;
 785        }
 786        ret = qemu_file_get_error(f);
 787        if (ret != 0) {
 788            return ret;
 789        }
 790    } while (!(flags & BLK_MIG_FLAG_EOS));
 791
 792    return 0;
 793}
 794
 795static void block_set_params(const MigrationParams *params, void *opaque)
 796{
 797    block_mig_state.blk_enable = params->blk;
 798    block_mig_state.shared_base = params->shared;
 799
 800    /* shared base means that blk_enable = 1 */
 801    block_mig_state.blk_enable |= params->shared;
 802}
 803
 804static bool block_is_active(void *opaque)
 805{
 806    return block_mig_state.blk_enable == 1;
 807}
 808
 809SaveVMHandlers savevm_block_handlers = {
 810    .set_params = block_set_params,
 811    .save_live_setup = block_save_setup,
 812    .save_live_iterate = block_save_iterate,
 813    .save_live_complete = block_save_complete,
 814    .save_live_pending = block_save_pending,
 815    .load_state = block_load,
 816    .cancel = block_migration_cancel,
 817    .is_active = block_is_active,
 818};
 819
 820void blk_mig_init(void)
 821{
 822    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
 823    QSIMPLEQ_INIT(&block_mig_state.blk_list);
 824    qemu_mutex_init(&block_mig_state.lock);
 825
 826    register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
 827                         &block_mig_state);
 828}
 829