qemu/block/replication.c
<<
>>
Prefs
   1/*
   2 * Replication Block filter
   3 *
   4 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
   5 * Copyright (c) 2016 Intel Corporation
   6 * Copyright (c) 2016 FUJITSU LIMITED
   7 *
   8 * Author:
   9 *   Wen Congyang <wency@cn.fujitsu.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 */
  14
  15#include "qemu/osdep.h"
  16#include "qemu/module.h"
  17#include "qemu/option.h"
  18#include "block/nbd.h"
  19#include "block/blockjob.h"
  20#include "block/block_int.h"
  21#include "block/block_backup.h"
  22#include "sysemu/block-backend.h"
  23#include "qapi/error.h"
  24#include "qapi/qmp/qdict.h"
  25#include "replication.h"
  26
  27typedef enum {
  28    BLOCK_REPLICATION_NONE,             /* block replication is not started */
  29    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
  30    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
  31    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
  32    BLOCK_REPLICATION_DONE,             /* block replication is done */
  33} ReplicationStage;
  34
  35typedef struct BDRVReplicationState {
  36    ReplicationMode mode;
  37    ReplicationStage stage;
  38    BdrvChild *active_disk;
  39    BlockJob *commit_job;
  40    BdrvChild *hidden_disk;
  41    BdrvChild *secondary_disk;
  42    BlockJob *backup_job;
  43    char *top_id;
  44    ReplicationState *rs;
  45    Error *blocker;
  46    bool orig_hidden_read_only;
  47    bool orig_secondary_read_only;
  48    int error;
  49} BDRVReplicationState;
  50
  51static void replication_start(ReplicationState *rs, ReplicationMode mode,
  52                              Error **errp);
  53static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
  54static void replication_get_error(ReplicationState *rs, Error **errp);
  55static void replication_stop(ReplicationState *rs, bool failover,
  56                             Error **errp);
  57
  58#define REPLICATION_MODE        "mode"
  59#define REPLICATION_TOP_ID      "top-id"
  60static QemuOptsList replication_runtime_opts = {
  61    .name = "replication",
  62    .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
  63    .desc = {
  64        {
  65            .name = REPLICATION_MODE,
  66            .type = QEMU_OPT_STRING,
  67        },
  68        {
  69            .name = REPLICATION_TOP_ID,
  70            .type = QEMU_OPT_STRING,
  71        },
  72        { /* end of list */ }
  73    },
  74};
  75
  76static ReplicationOps replication_ops = {
  77    .start = replication_start,
  78    .checkpoint = replication_do_checkpoint,
  79    .get_error = replication_get_error,
  80    .stop = replication_stop,
  81};
  82
  83static int replication_open(BlockDriverState *bs, QDict *options,
  84                            int flags, Error **errp)
  85{
  86    int ret;
  87    BDRVReplicationState *s = bs->opaque;
  88    QemuOpts *opts = NULL;
  89    const char *mode;
  90    const char *top_id;
  91
  92    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
  93                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
  94                               false, errp);
  95    if (!bs->file) {
  96        return -EINVAL;
  97    }
  98
  99    ret = -EINVAL;
 100    opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
 101    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 102        goto fail;
 103    }
 104
 105    mode = qemu_opt_get(opts, REPLICATION_MODE);
 106    if (!mode) {
 107        error_setg(errp, "Missing the option mode");
 108        goto fail;
 109    }
 110
 111    if (!strcmp(mode, "primary")) {
 112        s->mode = REPLICATION_MODE_PRIMARY;
 113        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
 114        if (top_id) {
 115            error_setg(errp,
 116                       "The primary side does not support option top-id");
 117            goto fail;
 118        }
 119    } else if (!strcmp(mode, "secondary")) {
 120        s->mode = REPLICATION_MODE_SECONDARY;
 121        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
 122        s->top_id = g_strdup(top_id);
 123        if (!s->top_id) {
 124            error_setg(errp, "Missing the option top-id");
 125            goto fail;
 126        }
 127    } else {
 128        error_setg(errp,
 129                   "The option mode's value should be primary or secondary");
 130        goto fail;
 131    }
 132
 133    s->rs = replication_new(bs, &replication_ops);
 134
 135    ret = 0;
 136
 137fail:
 138    qemu_opts_del(opts);
 139    return ret;
 140}
 141
 142static void replication_close(BlockDriverState *bs)
 143{
 144    BDRVReplicationState *s = bs->opaque;
 145    Job *commit_job;
 146
 147    if (s->stage == BLOCK_REPLICATION_RUNNING) {
 148        replication_stop(s->rs, false, NULL);
 149    }
 150    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
 151        commit_job = &s->commit_job->job;
 152        assert(commit_job->aio_context == qemu_get_current_aio_context());
 153        job_cancel_sync(commit_job);
 154    }
 155
 156    if (s->mode == REPLICATION_MODE_SECONDARY) {
 157        g_free(s->top_id);
 158    }
 159
 160    replication_remove(s->rs);
 161}
 162
 163static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
 164                                   BdrvChildRole role,
 165                                   BlockReopenQueue *reopen_queue,
 166                                   uint64_t perm, uint64_t shared,
 167                                   uint64_t *nperm, uint64_t *nshared)
 168{
 169    *nperm = BLK_PERM_CONSISTENT_READ;
 170    if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
 171        *nperm |= BLK_PERM_WRITE;
 172    }
 173    *nshared = BLK_PERM_CONSISTENT_READ
 174               | BLK_PERM_WRITE
 175               | BLK_PERM_WRITE_UNCHANGED;
 176    return;
 177}
 178
 179static int64_t replication_getlength(BlockDriverState *bs)
 180{
 181    return bdrv_getlength(bs->file->bs);
 182}
 183
 184static int replication_get_io_status(BDRVReplicationState *s)
 185{
 186    switch (s->stage) {
 187    case BLOCK_REPLICATION_NONE:
 188        return -EIO;
 189    case BLOCK_REPLICATION_RUNNING:
 190        return 0;
 191    case BLOCK_REPLICATION_FAILOVER:
 192        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
 193    case BLOCK_REPLICATION_FAILOVER_FAILED:
 194        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
 195    case BLOCK_REPLICATION_DONE:
 196        /*
 197         * active commit job completes, and active disk and secondary_disk
 198         * is swapped, so we can operate bs->file directly
 199         */
 200        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
 201    default:
 202        abort();
 203    }
 204}
 205
 206static int replication_return_value(BDRVReplicationState *s, int ret)
 207{
 208    if (s->mode == REPLICATION_MODE_SECONDARY) {
 209        return ret;
 210    }
 211
 212    if (ret < 0) {
 213        s->error = ret;
 214        ret = 0;
 215    }
 216
 217    return ret;
 218}
 219
 220static coroutine_fn int replication_co_readv(BlockDriverState *bs,
 221                                             int64_t sector_num,
 222                                             int remaining_sectors,
 223                                             QEMUIOVector *qiov)
 224{
 225    BDRVReplicationState *s = bs->opaque;
 226    int ret;
 227
 228    if (s->mode == REPLICATION_MODE_PRIMARY) {
 229        /* We only use it to forward primary write requests */
 230        return -EIO;
 231    }
 232
 233    ret = replication_get_io_status(s);
 234    if (ret < 0) {
 235        return ret;
 236    }
 237
 238    ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
 239                         remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
 240
 241    return replication_return_value(s, ret);
 242}
 243
 244static coroutine_fn int replication_co_writev(BlockDriverState *bs,
 245                                              int64_t sector_num,
 246                                              int remaining_sectors,
 247                                              QEMUIOVector *qiov,
 248                                              int flags)
 249{
 250    BDRVReplicationState *s = bs->opaque;
 251    QEMUIOVector hd_qiov;
 252    uint64_t bytes_done = 0;
 253    BdrvChild *top = bs->file;
 254    BdrvChild *base = s->secondary_disk;
 255    BdrvChild *target;
 256    int ret;
 257    int64_t n;
 258
 259    assert(!flags);
 260    ret = replication_get_io_status(s);
 261    if (ret < 0) {
 262        goto out;
 263    }
 264
 265    if (ret == 0) {
 266        ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE,
 267                              remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
 268        return replication_return_value(s, ret);
 269    }
 270
 271    /*
 272     * Failover failed, only write to active disk if the sectors
 273     * have already been allocated in active disk/hidden disk.
 274     */
 275    qemu_iovec_init(&hd_qiov, qiov->niov);
 276    while (remaining_sectors > 0) {
 277        int64_t count;
 278
 279        ret = bdrv_is_allocated_above(top->bs, base->bs, false,
 280                                      sector_num * BDRV_SECTOR_SIZE,
 281                                      remaining_sectors * BDRV_SECTOR_SIZE,
 282                                      &count);
 283        if (ret < 0) {
 284            goto out1;
 285        }
 286
 287        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
 288        n = count >> BDRV_SECTOR_BITS;
 289        qemu_iovec_reset(&hd_qiov);
 290        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
 291
 292        target = ret ? top : base;
 293        ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE,
 294                              n * BDRV_SECTOR_SIZE, &hd_qiov, 0);
 295        if (ret < 0) {
 296            goto out1;
 297        }
 298
 299        remaining_sectors -= n;
 300        sector_num += n;
 301        bytes_done += count;
 302    }
 303
 304out1:
 305    qemu_iovec_destroy(&hd_qiov);
 306out:
 307    return ret;
 308}
 309
 310static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
 311{
 312    Error *local_err = NULL;
 313    int ret;
 314
 315    if (!s->backup_job) {
 316        error_setg(errp, "Backup job was cancelled unexpectedly");
 317        return;
 318    }
 319
 320    backup_do_checkpoint(s->backup_job, &local_err);
 321    if (local_err) {
 322        error_propagate(errp, local_err);
 323        return;
 324    }
 325
 326    if (!s->active_disk->bs->drv) {
 327        error_setg(errp, "Active disk %s is ejected",
 328                   s->active_disk->bs->node_name);
 329        return;
 330    }
 331
 332    ret = bdrv_make_empty(s->active_disk, errp);
 333    if (ret < 0) {
 334        return;
 335    }
 336
 337    if (!s->hidden_disk->bs->drv) {
 338        error_setg(errp, "Hidden disk %s is ejected",
 339                   s->hidden_disk->bs->node_name);
 340        return;
 341    }
 342
 343    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
 344                                BLK_PERM_WRITE, BLK_PERM_ALL);
 345    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
 346    if (local_err) {
 347        error_propagate(errp, local_err);
 348        blk_unref(blk);
 349        return;
 350    }
 351
 352    ret = blk_make_empty(blk, errp);
 353    blk_unref(blk);
 354    if (ret < 0) {
 355        return;
 356    }
 357}
 358
 359/* This function is supposed to be called twice:
 360 * first with writable = true, then with writable = false.
 361 * The first call puts s->hidden_disk and s->secondary_disk in
 362 * r/w mode, and the second puts them back in their original state.
 363 */
 364static void reopen_backing_file(BlockDriverState *bs, bool writable,
 365                                Error **errp)
 366{
 367    BDRVReplicationState *s = bs->opaque;
 368    BlockReopenQueue *reopen_queue = NULL;
 369
 370    if (writable) {
 371        s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
 372        s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
 373    }
 374
 375    bdrv_subtree_drained_begin(s->hidden_disk->bs);
 376    bdrv_subtree_drained_begin(s->secondary_disk->bs);
 377
 378    if (s->orig_hidden_read_only) {
 379        QDict *opts = qdict_new();
 380        qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
 381        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
 382                                         opts, true);
 383    }
 384
 385    if (s->orig_secondary_read_only) {
 386        QDict *opts = qdict_new();
 387        qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
 388        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
 389                                         opts, true);
 390    }
 391
 392    if (reopen_queue) {
 393        bdrv_reopen_multiple(reopen_queue, errp);
 394    }
 395
 396    bdrv_subtree_drained_end(s->hidden_disk->bs);
 397    bdrv_subtree_drained_end(s->secondary_disk->bs);
 398}
 399
 400static void backup_job_cleanup(BlockDriverState *bs)
 401{
 402    BDRVReplicationState *s = bs->opaque;
 403    BlockDriverState *top_bs;
 404
 405    s->backup_job = NULL;
 406
 407    top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
 408    if (!top_bs) {
 409        return;
 410    }
 411    bdrv_op_unblock_all(top_bs, s->blocker);
 412    error_free(s->blocker);
 413    reopen_backing_file(bs, false, NULL);
 414}
 415
 416static void backup_job_completed(void *opaque, int ret)
 417{
 418    BlockDriverState *bs = opaque;
 419    BDRVReplicationState *s = bs->opaque;
 420
 421    if (s->stage != BLOCK_REPLICATION_FAILOVER) {
 422        /* The backup job is cancelled unexpectedly */
 423        s->error = -EIO;
 424    }
 425
 426    backup_job_cleanup(bs);
 427}
 428
 429static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
 430{
 431    BdrvChild *child;
 432
 433    /* The bs itself is the top_bs */
 434    if (top_bs == bs) {
 435        return true;
 436    }
 437
 438    /* Iterate over top_bs's children */
 439    QLIST_FOREACH(child, &top_bs->children, next) {
 440        if (child->bs == bs || check_top_bs(child->bs, bs)) {
 441            return true;
 442        }
 443    }
 444
 445    return false;
 446}
 447
 448static void replication_start(ReplicationState *rs, ReplicationMode mode,
 449                              Error **errp)
 450{
 451    BlockDriverState *bs = rs->opaque;
 452    BDRVReplicationState *s;
 453    BlockDriverState *top_bs;
 454    int64_t active_length, hidden_length, disk_length;
 455    AioContext *aio_context;
 456    Error *local_err = NULL;
 457    BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
 458
 459    aio_context = bdrv_get_aio_context(bs);
 460    aio_context_acquire(aio_context);
 461    s = bs->opaque;
 462
 463    if (s->stage == BLOCK_REPLICATION_DONE ||
 464        s->stage == BLOCK_REPLICATION_FAILOVER) {
 465        /*
 466         * This case happens when a secondary is promoted to primary.
 467         * Ignore the request because the secondary side of replication
 468         * doesn't have to do anything anymore.
 469         */
 470        aio_context_release(aio_context);
 471        return;
 472    }
 473
 474    if (s->stage != BLOCK_REPLICATION_NONE) {
 475        error_setg(errp, "Block replication is running or done");
 476        aio_context_release(aio_context);
 477        return;
 478    }
 479
 480    if (s->mode != mode) {
 481        error_setg(errp, "The parameter mode's value is invalid, needs %d,"
 482                   " but got %d", s->mode, mode);
 483        aio_context_release(aio_context);
 484        return;
 485    }
 486
 487    switch (s->mode) {
 488    case REPLICATION_MODE_PRIMARY:
 489        break;
 490    case REPLICATION_MODE_SECONDARY:
 491        s->active_disk = bs->file;
 492        if (!s->active_disk || !s->active_disk->bs ||
 493                                    !s->active_disk->bs->backing) {
 494            error_setg(errp, "Active disk doesn't have backing file");
 495            aio_context_release(aio_context);
 496            return;
 497        }
 498
 499        s->hidden_disk = s->active_disk->bs->backing;
 500        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
 501            error_setg(errp, "Hidden disk doesn't have backing file");
 502            aio_context_release(aio_context);
 503            return;
 504        }
 505
 506        s->secondary_disk = s->hidden_disk->bs->backing;
 507        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
 508            error_setg(errp, "The secondary disk doesn't have block backend");
 509            aio_context_release(aio_context);
 510            return;
 511        }
 512
 513        /* verify the length */
 514        active_length = bdrv_getlength(s->active_disk->bs);
 515        hidden_length = bdrv_getlength(s->hidden_disk->bs);
 516        disk_length = bdrv_getlength(s->secondary_disk->bs);
 517        if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
 518            active_length != hidden_length || hidden_length != disk_length) {
 519            error_setg(errp, "Active disk, hidden disk, secondary disk's length"
 520                       " are not the same");
 521            aio_context_release(aio_context);
 522            return;
 523        }
 524
 525        /* Must be true, or the bdrv_getlength() calls would have failed */
 526        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
 527
 528        if (!s->active_disk->bs->drv->bdrv_make_empty ||
 529            !s->hidden_disk->bs->drv->bdrv_make_empty) {
 530            error_setg(errp,
 531                       "Active disk or hidden disk doesn't support make_empty");
 532            aio_context_release(aio_context);
 533            return;
 534        }
 535
 536        /* reopen the backing file in r/w mode */
 537        reopen_backing_file(bs, true, &local_err);
 538        if (local_err) {
 539            error_propagate(errp, local_err);
 540            aio_context_release(aio_context);
 541            return;
 542        }
 543
 544        /* start backup job now */
 545        error_setg(&s->blocker,
 546                   "Block device is in use by internal backup job");
 547
 548        top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
 549        if (!top_bs || !bdrv_is_root_node(top_bs) ||
 550            !check_top_bs(top_bs, bs)) {
 551            error_setg(errp, "No top_bs or it is invalid");
 552            reopen_backing_file(bs, false, NULL);
 553            aio_context_release(aio_context);
 554            return;
 555        }
 556        bdrv_op_block_all(top_bs, s->blocker);
 557        bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
 558
 559        s->backup_job = backup_job_create(
 560                                NULL, s->secondary_disk->bs, s->hidden_disk->bs,
 561                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
 562                                &perf,
 563                                BLOCKDEV_ON_ERROR_REPORT,
 564                                BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
 565                                backup_job_completed, bs, NULL, &local_err);
 566        if (local_err) {
 567            error_propagate(errp, local_err);
 568            backup_job_cleanup(bs);
 569            aio_context_release(aio_context);
 570            return;
 571        }
 572        job_start(&s->backup_job->job);
 573        break;
 574    default:
 575        aio_context_release(aio_context);
 576        abort();
 577    }
 578
 579    s->stage = BLOCK_REPLICATION_RUNNING;
 580
 581    if (s->mode == REPLICATION_MODE_SECONDARY) {
 582        secondary_do_checkpoint(s, errp);
 583    }
 584
 585    s->error = 0;
 586    aio_context_release(aio_context);
 587}
 588
 589static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
 590{
 591    BlockDriverState *bs = rs->opaque;
 592    BDRVReplicationState *s;
 593    AioContext *aio_context;
 594
 595    aio_context = bdrv_get_aio_context(bs);
 596    aio_context_acquire(aio_context);
 597    s = bs->opaque;
 598
 599    if (s->stage == BLOCK_REPLICATION_DONE ||
 600        s->stage == BLOCK_REPLICATION_FAILOVER) {
 601        /*
 602         * This case happens when a secondary was promoted to primary.
 603         * Ignore the request because the secondary side of replication
 604         * doesn't have to do anything anymore.
 605         */
 606        aio_context_release(aio_context);
 607        return;
 608    }
 609
 610    if (s->mode == REPLICATION_MODE_SECONDARY) {
 611        secondary_do_checkpoint(s, errp);
 612    }
 613    aio_context_release(aio_context);
 614}
 615
 616static void replication_get_error(ReplicationState *rs, Error **errp)
 617{
 618    BlockDriverState *bs = rs->opaque;
 619    BDRVReplicationState *s;
 620    AioContext *aio_context;
 621
 622    aio_context = bdrv_get_aio_context(bs);
 623    aio_context_acquire(aio_context);
 624    s = bs->opaque;
 625
 626    if (s->stage == BLOCK_REPLICATION_NONE) {
 627        error_setg(errp, "Block replication is not running");
 628        aio_context_release(aio_context);
 629        return;
 630    }
 631
 632    if (s->error) {
 633        error_setg(errp, "I/O error occurred");
 634        aio_context_release(aio_context);
 635        return;
 636    }
 637    aio_context_release(aio_context);
 638}
 639
 640static void replication_done(void *opaque, int ret)
 641{
 642    BlockDriverState *bs = opaque;
 643    BDRVReplicationState *s = bs->opaque;
 644
 645    if (ret == 0) {
 646        s->stage = BLOCK_REPLICATION_DONE;
 647
 648        s->active_disk = NULL;
 649        s->secondary_disk = NULL;
 650        s->hidden_disk = NULL;
 651        s->error = 0;
 652    } else {
 653        s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
 654        s->error = -EIO;
 655    }
 656}
 657
 658static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
 659{
 660    BlockDriverState *bs = rs->opaque;
 661    BDRVReplicationState *s;
 662    AioContext *aio_context;
 663
 664    aio_context = bdrv_get_aio_context(bs);
 665    aio_context_acquire(aio_context);
 666    s = bs->opaque;
 667
 668    if (s->stage == BLOCK_REPLICATION_DONE ||
 669        s->stage == BLOCK_REPLICATION_FAILOVER) {
 670        /*
 671         * This case happens when a secondary was promoted to primary.
 672         * Ignore the request because the secondary side of replication
 673         * doesn't have to do anything anymore.
 674         */
 675        aio_context_release(aio_context);
 676        return;
 677    }
 678
 679    if (s->stage != BLOCK_REPLICATION_RUNNING) {
 680        error_setg(errp, "Block replication is not running");
 681        aio_context_release(aio_context);
 682        return;
 683    }
 684
 685    switch (s->mode) {
 686    case REPLICATION_MODE_PRIMARY:
 687        s->stage = BLOCK_REPLICATION_DONE;
 688        s->error = 0;
 689        break;
 690    case REPLICATION_MODE_SECONDARY:
 691        /*
 692         * This BDS will be closed, and the job should be completed
 693         * before the BDS is closed, because we will access hidden
 694         * disk, secondary disk in backup_job_completed().
 695         */
 696        if (s->backup_job) {
 697            job_cancel_sync(&s->backup_job->job);
 698        }
 699
 700        if (!failover) {
 701            secondary_do_checkpoint(s, errp);
 702            s->stage = BLOCK_REPLICATION_DONE;
 703            aio_context_release(aio_context);
 704            return;
 705        }
 706
 707        s->stage = BLOCK_REPLICATION_FAILOVER;
 708        s->commit_job = commit_active_start(
 709                            NULL, s->active_disk->bs, s->secondary_disk->bs,
 710                            JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
 711                            NULL, replication_done, bs, true, errp);
 712        break;
 713    default:
 714        aio_context_release(aio_context);
 715        abort();
 716    }
 717    aio_context_release(aio_context);
 718}
 719
 720static const char *const replication_strong_runtime_opts[] = {
 721    REPLICATION_MODE,
 722    REPLICATION_TOP_ID,
 723
 724    NULL
 725};
 726
 727static BlockDriver bdrv_replication = {
 728    .format_name                = "replication",
 729    .instance_size              = sizeof(BDRVReplicationState),
 730
 731    .bdrv_open                  = replication_open,
 732    .bdrv_close                 = replication_close,
 733    .bdrv_child_perm            = replication_child_perm,
 734
 735    .bdrv_getlength             = replication_getlength,
 736    .bdrv_co_readv              = replication_co_readv,
 737    .bdrv_co_writev             = replication_co_writev,
 738
 739    .is_filter                  = true,
 740
 741    .has_variable_length        = true,
 742    .strong_runtime_opts        = replication_strong_runtime_opts,
 743};
 744
 745static void bdrv_replication_init(void)
 746{
 747    bdrv_register(&bdrv_replication);
 748}
 749
 750block_init(bdrv_replication_init);
 751