qemu/block/block-backend.c
<<
>>
Prefs
   1/*
   2 * QEMU Block backends
   3 *
   4 * Copyright (C) 2014-2016 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Markus Armbruster <armbru@redhat.com>,
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1
  10 * or later.  See the COPYING.LIB file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/block-backend.h"
  15#include "block/block_int.h"
  16#include "block/blockjob.h"
  17#include "block/coroutines.h"
  18#include "block/throttle-groups.h"
  19#include "hw/qdev-core.h"
  20#include "sysemu/blockdev.h"
  21#include "sysemu/runstate.h"
  22#include "sysemu/replay.h"
  23#include "qapi/error.h"
  24#include "qapi/qapi-events-block.h"
  25#include "qemu/id.h"
  26#include "qemu/main-loop.h"
  27#include "qemu/option.h"
  28#include "trace.h"
  29#include "migration/misc.h"
  30
  31/* Number of coroutines to reserve per attached device model */
  32#define COROUTINE_POOL_RESERVATION 64
  33
  34#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  35
  36static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
  37
  38typedef struct BlockBackendAioNotifier {
  39    void (*attached_aio_context)(AioContext *new_context, void *opaque);
  40    void (*detach_aio_context)(void *opaque);
  41    void *opaque;
  42    QLIST_ENTRY(BlockBackendAioNotifier) list;
  43} BlockBackendAioNotifier;
  44
  45struct BlockBackend {
  46    char *name;
  47    int refcnt;
  48    BdrvChild *root;
  49    AioContext *ctx;
  50    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
  51    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
  52    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
  53    BlockBackendPublic public;
  54
  55    DeviceState *dev;           /* attached device model, if any */
  56    const BlockDevOps *dev_ops;
  57    void *dev_opaque;
  58
  59    /* If the BDS tree is removed, some of its options are stored here (which
  60     * can be used to restore those options in the new BDS on insert) */
  61    BlockBackendRootState root_state;
  62
  63    bool enable_write_cache;
  64
  65    /* I/O stats (display with "info blockstats"). */
  66    BlockAcctStats stats;
  67
  68    BlockdevOnError on_read_error, on_write_error;
  69    bool iostatus_enabled;
  70    BlockDeviceIoStatus iostatus;
  71
  72    uint64_t perm;
  73    uint64_t shared_perm;
  74    bool disable_perm;
  75
  76    bool allow_aio_context_change;
  77    bool allow_write_beyond_eof;
  78
  79    /* Protected by BQL */
  80    NotifierList remove_bs_notifiers, insert_bs_notifiers;
  81    QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
  82
  83    int quiesce_counter; /* atomic: written under BQL, read by other threads */
  84    QemuMutex queued_requests_lock; /* protects queued_requests */
  85    CoQueue queued_requests;
  86    bool disable_request_queuing; /* atomic */
  87
  88    VMChangeStateEntry *vmsh;
  89    bool force_allow_inactivate;
  90
  91    /* Number of in-flight aio requests.  BlockDriverState also counts
  92     * in-flight requests but aio requests can exist even when blk->root is
  93     * NULL, so we cannot rely on its counter for that case.
  94     * Accessed with atomic ops.
  95     */
  96    unsigned int in_flight;
  97};
  98
  99typedef struct BlockBackendAIOCB {
 100    BlockAIOCB common;
 101    BlockBackend *blk;
 102    int ret;
 103} BlockBackendAIOCB;
 104
 105static const AIOCBInfo block_backend_aiocb_info = {
 106    .get_aio_context = blk_aiocb_get_aio_context,
 107    .aiocb_size = sizeof(BlockBackendAIOCB),
 108};
 109
 110static void drive_info_del(DriveInfo *dinfo);
 111static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
 112
 113/* All BlockBackends. Protected by BQL. */
 114static QTAILQ_HEAD(, BlockBackend) block_backends =
 115    QTAILQ_HEAD_INITIALIZER(block_backends);
 116
 117/*
 118 * All BlockBackends referenced by the monitor and which are iterated through by
 119 * blk_next(). Protected by BQL.
 120 */
 121static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
 122    QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
 123
 124static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
 125                                     int *child_flags, QDict *child_options,
 126                                     int parent_flags, QDict *parent_options)
 127{
 128    /* We're not supposed to call this function for root nodes */
 129    abort();
 130}
 131static void blk_root_drained_begin(BdrvChild *child);
 132static bool blk_root_drained_poll(BdrvChild *child);
 133static void blk_root_drained_end(BdrvChild *child);
 134
 135static void blk_root_change_media(BdrvChild *child, bool load);
 136static void blk_root_resize(BdrvChild *child);
 137
 138static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
 139                                    GHashTable *visited, Transaction *tran,
 140                                    Error **errp);
 141
 142static char *blk_root_get_parent_desc(BdrvChild *child)
 143{
 144    BlockBackend *blk = child->opaque;
 145    g_autofree char *dev_id = NULL;
 146
 147    if (blk->name) {
 148        return g_strdup_printf("block device '%s'", blk->name);
 149    }
 150
 151    dev_id = blk_get_attached_dev_id(blk);
 152    if (*dev_id) {
 153        return g_strdup_printf("block device '%s'", dev_id);
 154    } else {
 155        /* TODO Callback into the BB owner for something more detailed */
 156        return g_strdup("an unnamed block device");
 157    }
 158}
 159
 160static const char *blk_root_get_name(BdrvChild *child)
 161{
 162    return blk_name(child->opaque);
 163}
 164
 165static void blk_vm_state_changed(void *opaque, bool running, RunState state)
 166{
 167    Error *local_err = NULL;
 168    BlockBackend *blk = opaque;
 169
 170    if (state == RUN_STATE_INMIGRATE) {
 171        return;
 172    }
 173
 174    qemu_del_vm_change_state_handler(blk->vmsh);
 175    blk->vmsh = NULL;
 176    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
 177    if (local_err) {
 178        error_report_err(local_err);
 179    }
 180}
 181
 182/*
 183 * Notifies the user of the BlockBackend that migration has completed. qdev
 184 * devices can tighten their permissions in response (specifically revoke
 185 * shared write permissions that we needed for storage migration).
 186 *
 187 * If an error is returned, the VM cannot be allowed to be resumed.
 188 */
 189static void blk_root_activate(BdrvChild *child, Error **errp)
 190{
 191    BlockBackend *blk = child->opaque;
 192    Error *local_err = NULL;
 193    uint64_t saved_shared_perm;
 194
 195    if (!blk->disable_perm) {
 196        return;
 197    }
 198
 199    blk->disable_perm = false;
 200
 201    /*
 202     * blk->shared_perm contains the permissions we want to share once
 203     * migration is really completely done.  For now, we need to share
 204     * all; but we also need to retain blk->shared_perm, which is
 205     * overwritten by a successful blk_set_perm() call.  Save it and
 206     * restore it below.
 207     */
 208    saved_shared_perm = blk->shared_perm;
 209
 210    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
 211    if (local_err) {
 212        error_propagate(errp, local_err);
 213        blk->disable_perm = true;
 214        return;
 215    }
 216    blk->shared_perm = saved_shared_perm;
 217
 218    if (runstate_check(RUN_STATE_INMIGRATE)) {
 219        /* Activation can happen when migration process is still active, for
 220         * example when nbd_server_add is called during non-shared storage
 221         * migration. Defer the shared_perm update to migration completion. */
 222        if (!blk->vmsh) {
 223            blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
 224                                                         blk);
 225        }
 226        return;
 227    }
 228
 229    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
 230    if (local_err) {
 231        error_propagate(errp, local_err);
 232        blk->disable_perm = true;
 233        return;
 234    }
 235}
 236
 237void blk_set_force_allow_inactivate(BlockBackend *blk)
 238{
 239    GLOBAL_STATE_CODE();
 240    blk->force_allow_inactivate = true;
 241}
 242
 243static bool blk_can_inactivate(BlockBackend *blk)
 244{
 245    /* If it is a guest device, inactivate is ok. */
 246    if (blk->dev || blk_name(blk)[0]) {
 247        return true;
 248    }
 249
 250    /* Inactivating means no more writes to the image can be done,
 251     * even if those writes would be changes invisible to the
 252     * guest.  For block job BBs that satisfy this, we can just allow
 253     * it.  This is the case for mirror job source, which is required
 254     * by libvirt non-shared block migration. */
 255    if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
 256        return true;
 257    }
 258
 259    return blk->force_allow_inactivate;
 260}
 261
 262static int blk_root_inactivate(BdrvChild *child)
 263{
 264    BlockBackend *blk = child->opaque;
 265
 266    if (blk->disable_perm) {
 267        return 0;
 268    }
 269
 270    if (!blk_can_inactivate(blk)) {
 271        return -EPERM;
 272    }
 273
 274    blk->disable_perm = true;
 275    if (blk->root) {
 276        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
 277    }
 278
 279    return 0;
 280}
 281
 282static void blk_root_attach(BdrvChild *child)
 283{
 284    BlockBackend *blk = child->opaque;
 285    BlockBackendAioNotifier *notifier;
 286
 287    trace_blk_root_attach(child, blk, child->bs);
 288
 289    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
 290        bdrv_add_aio_context_notifier(child->bs,
 291                notifier->attached_aio_context,
 292                notifier->detach_aio_context,
 293                notifier->opaque);
 294    }
 295}
 296
 297static void blk_root_detach(BdrvChild *child)
 298{
 299    BlockBackend *blk = child->opaque;
 300    BlockBackendAioNotifier *notifier;
 301
 302    trace_blk_root_detach(child, blk, child->bs);
 303
 304    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
 305        bdrv_remove_aio_context_notifier(child->bs,
 306                notifier->attached_aio_context,
 307                notifier->detach_aio_context,
 308                notifier->opaque);
 309    }
 310}
 311
 312static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
 313{
 314    BlockBackend *blk = c->opaque;
 315    IO_CODE();
 316
 317    return blk_get_aio_context(blk);
 318}
 319
 320static const BdrvChildClass child_root = {
 321    .inherit_options    = blk_root_inherit_options,
 322
 323    .change_media       = blk_root_change_media,
 324    .resize             = blk_root_resize,
 325    .get_name           = blk_root_get_name,
 326    .get_parent_desc    = blk_root_get_parent_desc,
 327
 328    .drained_begin      = blk_root_drained_begin,
 329    .drained_poll       = blk_root_drained_poll,
 330    .drained_end        = blk_root_drained_end,
 331
 332    .activate           = blk_root_activate,
 333    .inactivate         = blk_root_inactivate,
 334
 335    .attach             = blk_root_attach,
 336    .detach             = blk_root_detach,
 337
 338    .change_aio_ctx     = blk_root_change_aio_ctx,
 339
 340    .get_parent_aio_context = blk_root_get_parent_aio_context,
 341};
 342
 343/*
 344 * Create a new BlockBackend with a reference count of one.
 345 *
 346 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
 347 * to request for a block driver node that is attached to this BlockBackend.
 348 * @shared_perm is a bitmask which describes which permissions may be granted
 349 * to other users of the attached node.
 350 * Both sets of permissions can be changed later using blk_set_perm().
 351 *
 352 * Return the new BlockBackend on success, null on failure.
 353 */
 354BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 355{
 356    BlockBackend *blk;
 357
 358    GLOBAL_STATE_CODE();
 359
 360    blk = g_new0(BlockBackend, 1);
 361    blk->refcnt = 1;
 362    blk->ctx = ctx;
 363    blk->perm = perm;
 364    blk->shared_perm = shared_perm;
 365    blk_set_enable_write_cache(blk, true);
 366
 367    blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
 368    blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
 369
 370    block_acct_init(&blk->stats);
 371
 372    qemu_mutex_init(&blk->queued_requests_lock);
 373    qemu_co_queue_init(&blk->queued_requests);
 374    notifier_list_init(&blk->remove_bs_notifiers);
 375    notifier_list_init(&blk->insert_bs_notifiers);
 376    QLIST_INIT(&blk->aio_notifiers);
 377
 378    QTAILQ_INSERT_TAIL(&block_backends, blk, link);
 379    return blk;
 380}
 381
 382/*
 383 * Create a new BlockBackend connected to an existing BlockDriverState.
 384 *
 385 * @perm is a bitmasks of BLK_PERM_* constants which describes the
 386 * permissions to request for @bs that is attached to this
 387 * BlockBackend.  @shared_perm is a bitmask which describes which
 388 * permissions may be granted to other users of the attached node.
 389 * Both sets of permissions can be changed later using blk_set_perm().
 390 *
 391 * Return the new BlockBackend on success, null on failure.
 392 *
 393 * Callers must hold the AioContext lock of @bs.
 394 */
 395BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
 396                              uint64_t shared_perm, Error **errp)
 397{
 398    BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
 399
 400    GLOBAL_STATE_CODE();
 401
 402    if (blk_insert_bs(blk, bs, errp) < 0) {
 403        blk_unref(blk);
 404        return NULL;
 405    }
 406    return blk;
 407}
 408
 409/*
 410 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
 411 * By default, the new BlockBackend is in the main AioContext, but if the
 412 * parameters connect it with any existing node in a different AioContext, it
 413 * may end up there instead.
 414 *
 415 * Just as with bdrv_open(), after having called this function the reference to
 416 * @options belongs to the block layer (even on failure).
 417 *
 418 * Called without holding an AioContext lock.
 419 *
 420 * TODO: Remove @filename and @flags; it should be possible to specify a whole
 421 * BDS tree just by specifying the @options QDict (or @reference,
 422 * alternatively). At the time of adding this function, this is not possible,
 423 * though, so callers of this function have to be able to specify @filename and
 424 * @flags.
 425 */
 426BlockBackend *blk_new_open(const char *filename, const char *reference,
 427                           QDict *options, int flags, Error **errp)
 428{
 429    BlockBackend *blk;
 430    BlockDriverState *bs;
 431    AioContext *ctx;
 432    uint64_t perm = 0;
 433    uint64_t shared = BLK_PERM_ALL;
 434
 435    GLOBAL_STATE_CODE();
 436
 437    /*
 438     * blk_new_open() is mainly used in .bdrv_create implementations and the
 439     * tools where sharing isn't a major concern because the BDS stays private
 440     * and the file is generally not supposed to be used by a second process,
 441     * so we just request permission according to the flags.
 442     *
 443     * The exceptions are xen_disk and blockdev_init(); in these cases, the
 444     * caller of blk_new_open() doesn't make use of the permissions, but they
 445     * shouldn't hurt either. We can still share everything here because the
 446     * guest devices will add their own blockers if they can't share.
 447     */
 448    if ((flags & BDRV_O_NO_IO) == 0) {
 449        perm |= BLK_PERM_CONSISTENT_READ;
 450        if (flags & BDRV_O_RDWR) {
 451            perm |= BLK_PERM_WRITE;
 452        }
 453    }
 454    if (flags & BDRV_O_RESIZE) {
 455        perm |= BLK_PERM_RESIZE;
 456    }
 457    if (flags & BDRV_O_NO_SHARE) {
 458        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
 459    }
 460
 461    aio_context_acquire(qemu_get_aio_context());
 462    bs = bdrv_open(filename, reference, options, flags, errp);
 463    aio_context_release(qemu_get_aio_context());
 464    if (!bs) {
 465        return NULL;
 466    }
 467
 468    /* bdrv_open() could have moved bs to a different AioContext */
 469    ctx = bdrv_get_aio_context(bs);
 470    blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
 471    blk->perm = perm;
 472    blk->shared_perm = shared;
 473
 474    aio_context_acquire(ctx);
 475    blk_insert_bs(blk, bs, errp);
 476    bdrv_unref(bs);
 477    aio_context_release(ctx);
 478
 479    if (!blk->root) {
 480        blk_unref(blk);
 481        return NULL;
 482    }
 483
 484    return blk;
 485}
 486
 487static void blk_delete(BlockBackend *blk)
 488{
 489    assert(!blk->refcnt);
 490    assert(!blk->name);
 491    assert(!blk->dev);
 492    if (blk->public.throttle_group_member.throttle_state) {
 493        blk_io_limits_disable(blk);
 494    }
 495    if (blk->root) {
 496        blk_remove_bs(blk);
 497    }
 498    if (blk->vmsh) {
 499        qemu_del_vm_change_state_handler(blk->vmsh);
 500        blk->vmsh = NULL;
 501    }
 502    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
 503    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
 504    assert(QLIST_EMPTY(&blk->aio_notifiers));
 505    assert(qemu_co_queue_empty(&blk->queued_requests));
 506    qemu_mutex_destroy(&blk->queued_requests_lock);
 507    QTAILQ_REMOVE(&block_backends, blk, link);
 508    drive_info_del(blk->legacy_dinfo);
 509    block_acct_cleanup(&blk->stats);
 510    g_free(blk);
 511}
 512
 513static void drive_info_del(DriveInfo *dinfo)
 514{
 515    if (!dinfo) {
 516        return;
 517    }
 518    qemu_opts_del(dinfo->opts);
 519    g_free(dinfo);
 520}
 521
 522int blk_get_refcnt(BlockBackend *blk)
 523{
 524    GLOBAL_STATE_CODE();
 525    return blk ? blk->refcnt : 0;
 526}
 527
 528/*
 529 * Increment @blk's reference count.
 530 * @blk must not be null.
 531 */
 532void blk_ref(BlockBackend *blk)
 533{
 534    assert(blk->refcnt > 0);
 535    GLOBAL_STATE_CODE();
 536    blk->refcnt++;
 537}
 538
 539/*
 540 * Decrement @blk's reference count.
 541 * If this drops it to zero, destroy @blk.
 542 * For convenience, do nothing if @blk is null.
 543 */
 544void blk_unref(BlockBackend *blk)
 545{
 546    GLOBAL_STATE_CODE();
 547    if (blk) {
 548        assert(blk->refcnt > 0);
 549        if (blk->refcnt > 1) {
 550            blk->refcnt--;
 551        } else {
 552            blk_drain(blk);
 553            /* blk_drain() cannot resurrect blk, nobody held a reference */
 554            assert(blk->refcnt == 1);
 555            blk->refcnt = 0;
 556            blk_delete(blk);
 557        }
 558    }
 559}
 560
 561/*
 562 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 563 * ones which are hidden (i.e. are not referenced by the monitor).
 564 */
 565BlockBackend *blk_all_next(BlockBackend *blk)
 566{
 567    GLOBAL_STATE_CODE();
 568    return blk ? QTAILQ_NEXT(blk, link)
 569               : QTAILQ_FIRST(&block_backends);
 570}
 571
 572void blk_remove_all_bs(void)
 573{
 574    BlockBackend *blk = NULL;
 575
 576    GLOBAL_STATE_CODE();
 577
 578    while ((blk = blk_all_next(blk)) != NULL) {
 579        AioContext *ctx = blk_get_aio_context(blk);
 580
 581        aio_context_acquire(ctx);
 582        if (blk->root) {
 583            blk_remove_bs(blk);
 584        }
 585        aio_context_release(ctx);
 586    }
 587}
 588
 589/*
 590 * Return the monitor-owned BlockBackend after @blk.
 591 * If @blk is null, return the first one.
 592 * Else, return @blk's next sibling, which may be null.
 593 *
 594 * To iterate over all BlockBackends, do
 595 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 596 *     ...
 597 * }
 598 */
 599BlockBackend *blk_next(BlockBackend *blk)
 600{
 601    GLOBAL_STATE_CODE();
 602    return blk ? QTAILQ_NEXT(blk, monitor_link)
 603               : QTAILQ_FIRST(&monitor_block_backends);
 604}
 605
 606/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
 607 * the monitor or attached to a BlockBackend */
 608BlockDriverState *bdrv_next(BdrvNextIterator *it)
 609{
 610    BlockDriverState *bs, *old_bs;
 611
 612    /* Must be called from the main loop */
 613    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 614
 615    /* First, return all root nodes of BlockBackends. In order to avoid
 616     * returning a BDS twice when multiple BBs refer to it, we only return it
 617     * if the BB is the first one in the parent list of the BDS. */
 618    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
 619        BlockBackend *old_blk = it->blk;
 620
 621        old_bs = old_blk ? blk_bs(old_blk) : NULL;
 622
 623        do {
 624            it->blk = blk_all_next(it->blk);
 625            bs = it->blk ? blk_bs(it->blk) : NULL;
 626        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 627
 628        if (it->blk) {
 629            blk_ref(it->blk);
 630        }
 631        blk_unref(old_blk);
 632
 633        if (bs) {
 634            bdrv_ref(bs);
 635            bdrv_unref(old_bs);
 636            return bs;
 637        }
 638        it->phase = BDRV_NEXT_MONITOR_OWNED;
 639    } else {
 640        old_bs = it->bs;
 641    }
 642
 643    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
 644     * BDSes that are attached to a BlockBackend here; they have been handled
 645     * by the above block already */
 646    do {
 647        it->bs = bdrv_next_monitor_owned(it->bs);
 648        bs = it->bs;
 649    } while (bs && bdrv_has_blk(bs));
 650
 651    if (bs) {
 652        bdrv_ref(bs);
 653    }
 654    bdrv_unref(old_bs);
 655
 656    return bs;
 657}
 658
 659static void bdrv_next_reset(BdrvNextIterator *it)
 660{
 661    *it = (BdrvNextIterator) {
 662        .phase = BDRV_NEXT_BACKEND_ROOTS,
 663    };
 664}
 665
 666BlockDriverState *bdrv_first(BdrvNextIterator *it)
 667{
 668    GLOBAL_STATE_CODE();
 669    bdrv_next_reset(it);
 670    return bdrv_next(it);
 671}
 672
 673/* Must be called when aborting a bdrv_next() iteration before
 674 * bdrv_next() returns NULL */
 675void bdrv_next_cleanup(BdrvNextIterator *it)
 676{
 677    /* Must be called from the main loop */
 678    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 679
 680    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
 681        if (it->blk) {
 682            bdrv_unref(blk_bs(it->blk));
 683            blk_unref(it->blk);
 684        }
 685    } else {
 686        bdrv_unref(it->bs);
 687    }
 688
 689    bdrv_next_reset(it);
 690}
 691
 692/*
 693 * Add a BlockBackend into the list of backends referenced by the monitor, with
 694 * the given @name acting as the handle for the monitor.
 695 * Strictly for use by blockdev.c.
 696 *
 697 * @name must not be null or empty.
 698 *
 699 * Returns true on success and false on failure. In the latter case, an Error
 700 * object is returned through @errp.
 701 */
 702bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
 703{
 704    assert(!blk->name);
 705    assert(name && name[0]);
 706    GLOBAL_STATE_CODE();
 707
 708    if (!id_wellformed(name)) {
 709        error_setg(errp, "Invalid device name");
 710        return false;
 711    }
 712    if (blk_by_name(name)) {
 713        error_setg(errp, "Device with id '%s' already exists", name);
 714        return false;
 715    }
 716    if (bdrv_find_node(name)) {
 717        error_setg(errp,
 718                   "Device name '%s' conflicts with an existing node name",
 719                   name);
 720        return false;
 721    }
 722
 723    blk->name = g_strdup(name);
 724    QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
 725    return true;
 726}
 727
 728/*
 729 * Remove a BlockBackend from the list of backends referenced by the monitor.
 730 * Strictly for use by blockdev.c.
 731 */
 732void monitor_remove_blk(BlockBackend *blk)
 733{
 734    GLOBAL_STATE_CODE();
 735
 736    if (!blk->name) {
 737        return;
 738    }
 739
 740    QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
 741    g_free(blk->name);
 742    blk->name = NULL;
 743}
 744
 745/*
 746 * Return @blk's name, a non-null string.
 747 * Returns an empty string iff @blk is not referenced by the monitor.
 748 */
 749const char *blk_name(const BlockBackend *blk)
 750{
 751    IO_CODE();
 752    return blk->name ?: "";
 753}
 754
 755/*
 756 * Return the BlockBackend with name @name if it exists, else null.
 757 * @name must not be null.
 758 */
 759BlockBackend *blk_by_name(const char *name)
 760{
 761    BlockBackend *blk = NULL;
 762
 763    GLOBAL_STATE_CODE();
 764    assert(name);
 765    while ((blk = blk_next(blk)) != NULL) {
 766        if (!strcmp(name, blk->name)) {
 767            return blk;
 768        }
 769    }
 770    return NULL;
 771}
 772
 773/*
 774 * Return the BlockDriverState attached to @blk if any, else null.
 775 */
 776BlockDriverState *blk_bs(BlockBackend *blk)
 777{
 778    IO_CODE();
 779    return blk->root ? blk->root->bs : NULL;
 780}
 781
 782static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
 783{
 784    BdrvChild *child;
 785
 786    GLOBAL_STATE_CODE();
 787
 788    QLIST_FOREACH(child, &bs->parents, next_parent) {
 789        if (child->klass == &child_root) {
 790            return child->opaque;
 791        }
 792    }
 793
 794    return NULL;
 795}
 796
 797/*
 798 * Returns true if @bs has an associated BlockBackend.
 799 */
 800bool bdrv_has_blk(BlockDriverState *bs)
 801{
 802    GLOBAL_STATE_CODE();
 803    return bdrv_first_blk(bs) != NULL;
 804}
 805
 806/*
 807 * Returns true if @bs has only BlockBackends as parents.
 808 */
 809bool bdrv_is_root_node(BlockDriverState *bs)
 810{
 811    BdrvChild *c;
 812
 813    GLOBAL_STATE_CODE();
 814    QLIST_FOREACH(c, &bs->parents, next_parent) {
 815        if (c->klass != &child_root) {
 816            return false;
 817        }
 818    }
 819
 820    return true;
 821}
 822
 823/*
 824 * Return @blk's DriveInfo if any, else null.
 825 */
 826DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 827{
 828    GLOBAL_STATE_CODE();
 829    return blk->legacy_dinfo;
 830}
 831
 832/*
 833 * Set @blk's DriveInfo to @dinfo, and return it.
 834 * @blk must not have a DriveInfo set already.
 835 * No other BlockBackend may have the same DriveInfo set.
 836 */
 837DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 838{
 839    assert(!blk->legacy_dinfo);
 840    GLOBAL_STATE_CODE();
 841    return blk->legacy_dinfo = dinfo;
 842}
 843
 844/*
 845 * Return the BlockBackend with DriveInfo @dinfo.
 846 * It must exist.
 847 */
 848BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 849{
 850    BlockBackend *blk = NULL;
 851    GLOBAL_STATE_CODE();
 852
 853    while ((blk = blk_next(blk)) != NULL) {
 854        if (blk->legacy_dinfo == dinfo) {
 855            return blk;
 856        }
 857    }
 858    abort();
 859}
 860
 861/*
 862 * Returns a pointer to the publicly accessible fields of @blk.
 863 */
 864BlockBackendPublic *blk_get_public(BlockBackend *blk)
 865{
 866    GLOBAL_STATE_CODE();
 867    return &blk->public;
 868}
 869
 870/*
 871 * Returns a BlockBackend given the associated @public fields.
 872 */
 873BlockBackend *blk_by_public(BlockBackendPublic *public)
 874{
 875    GLOBAL_STATE_CODE();
 876    return container_of(public, BlockBackend, public);
 877}
 878
 879/*
 880 * Disassociates the currently associated BlockDriverState from @blk.
 881 */
 882void blk_remove_bs(BlockBackend *blk)
 883{
 884    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
 885    BdrvChild *root;
 886
 887    GLOBAL_STATE_CODE();
 888
 889    notifier_list_notify(&blk->remove_bs_notifiers, blk);
 890    if (tgm->throttle_state) {
 891        BlockDriverState *bs = blk_bs(blk);
 892
 893        /*
 894         * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
 895         * example, if a temporary filter node is removed by a blockjob.
 896         */
 897        bdrv_ref(bs);
 898        bdrv_drained_begin(bs);
 899        throttle_group_detach_aio_context(tgm);
 900        throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
 901        bdrv_drained_end(bs);
 902        bdrv_unref(bs);
 903    }
 904
 905    blk_update_root_state(blk);
 906
 907    /* bdrv_root_unref_child() will cause blk->root to become stale and may
 908     * switch to a completion coroutine later on. Let's drain all I/O here
 909     * to avoid that and a potential QEMU crash.
 910     */
 911    blk_drain(blk);
 912    root = blk->root;
 913    blk->root = NULL;
 914    bdrv_root_unref_child(root);
 915}
 916
 917/*
 918 * Associates a new BlockDriverState with @blk.
 919 *
 920 * Callers must hold the AioContext lock of @bs.
 921 */
 922int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 923{
 924    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
 925    GLOBAL_STATE_CODE();
 926    bdrv_ref(bs);
 927    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
 928                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
 929                                       blk->perm, blk->shared_perm,
 930                                       blk, errp);
 931    if (blk->root == NULL) {
 932        return -EPERM;
 933    }
 934
 935    notifier_list_notify(&blk->insert_bs_notifiers, blk);
 936    if (tgm->throttle_state) {
 937        throttle_group_detach_aio_context(tgm);
 938        throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
 939    }
 940
 941    return 0;
 942}
 943
 944/*
 945 * Change BlockDriverState associated with @blk.
 946 */
 947int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
 948{
 949    GLOBAL_STATE_CODE();
 950    return bdrv_replace_child_bs(blk->root, new_bs, errp);
 951}
 952
 953/*
 954 * Sets the permission bitmasks that the user of the BlockBackend needs.
 955 */
 956int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
 957                 Error **errp)
 958{
 959    int ret;
 960    GLOBAL_STATE_CODE();
 961
 962    if (blk->root && !blk->disable_perm) {
 963        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
 964        if (ret < 0) {
 965            return ret;
 966        }
 967    }
 968
 969    blk->perm = perm;
 970    blk->shared_perm = shared_perm;
 971
 972    return 0;
 973}
 974
 975void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
 976{
 977    GLOBAL_STATE_CODE();
 978    *perm = blk->perm;
 979    *shared_perm = blk->shared_perm;
 980}
 981
 982/*
 983 * Attach device model @dev to @blk.
 984 * Return 0 on success, -EBUSY when a device model is attached already.
 985 */
 986int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
 987{
 988    GLOBAL_STATE_CODE();
 989    if (blk->dev) {
 990        return -EBUSY;
 991    }
 992
 993    /* While migration is still incoming, we don't need to apply the
 994     * permissions of guest device BlockBackends. We might still have a block
 995     * job or NBD server writing to the image for storage migration. */
 996    if (runstate_check(RUN_STATE_INMIGRATE)) {
 997        blk->disable_perm = true;
 998    }
 999
1000    blk_ref(blk);
1001    blk->dev = dev;
1002    blk_iostatus_reset(blk);
1003
1004    return 0;
1005}
1006
1007/*
1008 * Detach device model @dev from @blk.
1009 * @dev must be currently attached to @blk.
1010 */
1011void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
1012{
1013    assert(blk->dev == dev);
1014    GLOBAL_STATE_CODE();
1015    blk->dev = NULL;
1016    blk->dev_ops = NULL;
1017    blk->dev_opaque = NULL;
1018    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
1019    blk_unref(blk);
1020}
1021
1022/*
1023 * Return the device model attached to @blk if any, else null.
1024 */
1025DeviceState *blk_get_attached_dev(BlockBackend *blk)
1026{
1027    GLOBAL_STATE_CODE();
1028    return blk->dev;
1029}
1030
1031/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
1032 * device attached to the BlockBackend. */
1033char *blk_get_attached_dev_id(BlockBackend *blk)
1034{
1035    DeviceState *dev = blk->dev;
1036    IO_CODE();
1037
1038    if (!dev) {
1039        return g_strdup("");
1040    } else if (dev->id) {
1041        return g_strdup(dev->id);
1042    }
1043
1044    return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
1045}
1046
1047/*
1048 * Return the BlockBackend which has the device model @dev attached if it
1049 * exists, else null.
1050 *
1051 * @dev must not be null.
1052 */
1053BlockBackend *blk_by_dev(void *dev)
1054{
1055    BlockBackend *blk = NULL;
1056
1057    GLOBAL_STATE_CODE();
1058
1059    assert(dev != NULL);
1060    while ((blk = blk_all_next(blk)) != NULL) {
1061        if (blk->dev == dev) {
1062            return blk;
1063        }
1064    }
1065    return NULL;
1066}
1067
1068/*
1069 * Set @blk's device model callbacks to @ops.
1070 * @opaque is the opaque argument to pass to the callbacks.
1071 * This is for use by device models.
1072 */
1073void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
1074                     void *opaque)
1075{
1076    GLOBAL_STATE_CODE();
1077    blk->dev_ops = ops;
1078    blk->dev_opaque = opaque;
1079
1080    /* Are we currently quiesced? Should we enforce this right now? */
1081    if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
1082        ops->drained_begin(opaque);
1083    }
1084}
1085
1086/*
1087 * Notify @blk's attached device model of media change.
1088 *
1089 * If @load is true, notify of media load. This action can fail, meaning that
1090 * the medium cannot be loaded. @errp is set then.
1091 *
1092 * If @load is false, notify of media eject. This can never fail.
1093 *
1094 * Also send DEVICE_TRAY_MOVED events as appropriate.
1095 */
1096void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
1097{
1098    GLOBAL_STATE_CODE();
1099    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
1100        bool tray_was_open, tray_is_open;
1101        Error *local_err = NULL;
1102
1103        tray_was_open = blk_dev_is_tray_open(blk);
1104        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
1105        if (local_err) {
1106            assert(load == true);
1107            error_propagate(errp, local_err);
1108            return;
1109        }
1110        tray_is_open = blk_dev_is_tray_open(blk);
1111
1112        if (tray_was_open != tray_is_open) {
1113            char *id = blk_get_attached_dev_id(blk);
1114            qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
1115            g_free(id);
1116        }
1117    }
1118}
1119
1120static void blk_root_change_media(BdrvChild *child, bool load)
1121{
1122    blk_dev_change_media_cb(child->opaque, load, NULL);
1123}
1124
1125/*
1126 * Does @blk's attached device model have removable media?
1127 * %true if no device model is attached.
1128 */
1129bool blk_dev_has_removable_media(BlockBackend *blk)
1130{
1131    GLOBAL_STATE_CODE();
1132    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
1133}
1134
1135/*
1136 * Does @blk's attached device model have a tray?
1137 */
1138bool blk_dev_has_tray(BlockBackend *blk)
1139{
1140    IO_CODE();
1141    return blk->dev_ops && blk->dev_ops->is_tray_open;
1142}
1143
1144/*
1145 * Notify @blk's attached device model of a media eject request.
1146 * If @force is true, the medium is about to be yanked out forcefully.
1147 */
1148void blk_dev_eject_request(BlockBackend *blk, bool force)
1149{
1150    GLOBAL_STATE_CODE();
1151    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
1152        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
1153    }
1154}
1155
1156/*
1157 * Does @blk's attached device model have a tray, and is it open?
1158 */
1159bool blk_dev_is_tray_open(BlockBackend *blk)
1160{
1161    IO_CODE();
1162    if (blk_dev_has_tray(blk)) {
1163        return blk->dev_ops->is_tray_open(blk->dev_opaque);
1164    }
1165    return false;
1166}
1167
1168/*
1169 * Does @blk's attached device model have the medium locked?
1170 * %false if the device model has no such lock.
1171 */
1172bool blk_dev_is_medium_locked(BlockBackend *blk)
1173{
1174    GLOBAL_STATE_CODE();
1175    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1176        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1177    }
1178    return false;
1179}
1180
1181/*
1182 * Notify @blk's attached device model of a backend size change.
1183 */
1184static void blk_root_resize(BdrvChild *child)
1185{
1186    BlockBackend *blk = child->opaque;
1187
1188    if (blk->dev_ops && blk->dev_ops->resize_cb) {
1189        blk->dev_ops->resize_cb(blk->dev_opaque);
1190    }
1191}
1192
1193void blk_iostatus_enable(BlockBackend *blk)
1194{
1195    GLOBAL_STATE_CODE();
1196    blk->iostatus_enabled = true;
1197    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1198}
1199
1200/* The I/O status is only enabled if the drive explicitly
1201 * enables it _and_ the VM is configured to stop on errors */
1202bool blk_iostatus_is_enabled(const BlockBackend *blk)
1203{
1204    IO_CODE();
1205    return (blk->iostatus_enabled &&
1206           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
1207            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
1208            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1209}
1210
1211BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1212{
1213    GLOBAL_STATE_CODE();
1214    return blk->iostatus;
1215}
1216
1217void blk_iostatus_disable(BlockBackend *blk)
1218{
1219    GLOBAL_STATE_CODE();
1220    blk->iostatus_enabled = false;
1221}
1222
1223void blk_iostatus_reset(BlockBackend *blk)
1224{
1225    GLOBAL_STATE_CODE();
1226    if (blk_iostatus_is_enabled(blk)) {
1227        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1228    }
1229}
1230
1231void blk_iostatus_set_err(BlockBackend *blk, int error)
1232{
1233    IO_CODE();
1234    assert(blk_iostatus_is_enabled(blk));
1235    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1236        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1237                                          BLOCK_DEVICE_IO_STATUS_FAILED;
1238    }
1239}
1240
1241void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1242{
1243    IO_CODE();
1244    blk->allow_write_beyond_eof = allow;
1245}
1246
1247void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
1248{
1249    IO_CODE();
1250    blk->allow_aio_context_change = allow;
1251}
1252
1253void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
1254{
1255    IO_CODE();
1256    qatomic_set(&blk->disable_request_queuing, disable);
1257}
1258
1259static int coroutine_fn GRAPH_RDLOCK
1260blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
1261{
1262    int64_t len;
1263
1264    if (bytes < 0) {
1265        return -EIO;
1266    }
1267
1268    if (!blk_co_is_available(blk)) {
1269        return -ENOMEDIUM;
1270    }
1271
1272    if (offset < 0) {
1273        return -EIO;
1274    }
1275
1276    if (!blk->allow_write_beyond_eof) {
1277        len = bdrv_co_getlength(blk_bs(blk));
1278        if (len < 0) {
1279            return len;
1280        }
1281
1282        if (offset > len || len - offset < bytes) {
1283            return -EIO;
1284        }
1285    }
1286
1287    return 0;
1288}
1289
1290/* Are we currently in a drained section? */
1291bool blk_in_drain(BlockBackend *blk)
1292{
1293    GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
1294    return qatomic_read(&blk->quiesce_counter);
1295}
1296
1297/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1298static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
1299{
1300    assert(blk->in_flight > 0);
1301
1302    if (qatomic_read(&blk->quiesce_counter) &&
1303        !qatomic_read(&blk->disable_request_queuing)) {
1304        /*
1305         * Take lock before decrementing in flight counter so main loop thread
1306         * waits for us to enqueue ourselves before it can leave the drained
1307         * section.
1308         */
1309        qemu_mutex_lock(&blk->queued_requests_lock);
1310        blk_dec_in_flight(blk);
1311        qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
1312        blk_inc_in_flight(blk);
1313        qemu_mutex_unlock(&blk->queued_requests_lock);
1314    }
1315}
1316
1317/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1318static int coroutine_fn
1319blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
1320                      QEMUIOVector *qiov, size_t qiov_offset,
1321                      BdrvRequestFlags flags)
1322{
1323    int ret;
1324    BlockDriverState *bs;
1325    IO_CODE();
1326
1327    blk_wait_while_drained(blk);
1328    GRAPH_RDLOCK_GUARD();
1329
1330    /* Call blk_bs() only after waiting, the graph may have changed */
1331    bs = blk_bs(blk);
1332    trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1333
1334    ret = blk_check_byte_request(blk, offset, bytes);
1335    if (ret < 0) {
1336        return ret;
1337    }
1338
1339    bdrv_inc_in_flight(bs);
1340
1341    /* throttling disk I/O */
1342    if (blk->public.throttle_group_member.throttle_state) {
1343        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1344                bytes, false);
1345    }
1346
1347    ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
1348                              flags);
1349    bdrv_dec_in_flight(bs);
1350    return ret;
1351}
1352
1353int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
1354                              void *buf, BdrvRequestFlags flags)
1355{
1356    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1357    IO_OR_GS_CODE();
1358
1359    assert(bytes <= SIZE_MAX);
1360
1361    return blk_co_preadv(blk, offset, bytes, &qiov, flags);
1362}
1363
1364int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1365                               int64_t bytes, QEMUIOVector *qiov,
1366                               BdrvRequestFlags flags)
1367{
1368    int ret;
1369    IO_OR_GS_CODE();
1370
1371    blk_inc_in_flight(blk);
1372    ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags);
1373    blk_dec_in_flight(blk);
1374
1375    return ret;
1376}
1377
1378int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
1379                                    int64_t bytes, QEMUIOVector *qiov,
1380                                    size_t qiov_offset, BdrvRequestFlags flags)
1381{
1382    int ret;
1383    IO_OR_GS_CODE();
1384
1385    blk_inc_in_flight(blk);
1386    ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags);
1387    blk_dec_in_flight(blk);
1388
1389    return ret;
1390}
1391
1392/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1393static int coroutine_fn
1394blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
1395                       QEMUIOVector *qiov, size_t qiov_offset,
1396                       BdrvRequestFlags flags)
1397{
1398    int ret;
1399    BlockDriverState *bs;
1400    IO_CODE();
1401
1402    blk_wait_while_drained(blk);
1403    GRAPH_RDLOCK_GUARD();
1404
1405    /* Call blk_bs() only after waiting, the graph may have changed */
1406    bs = blk_bs(blk);
1407    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1408
1409    ret = blk_check_byte_request(blk, offset, bytes);
1410    if (ret < 0) {
1411        return ret;
1412    }
1413
1414    bdrv_inc_in_flight(bs);
1415    /* throttling disk I/O */
1416    if (blk->public.throttle_group_member.throttle_state) {
1417        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1418                bytes, true);
1419    }
1420
1421    if (!blk->enable_write_cache) {
1422        flags |= BDRV_REQ_FUA;
1423    }
1424
1425    ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
1426                               flags);
1427    bdrv_dec_in_flight(bs);
1428    return ret;
1429}
1430
1431int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
1432                                     int64_t bytes,
1433                                     QEMUIOVector *qiov, size_t qiov_offset,
1434                                     BdrvRequestFlags flags)
1435{
1436    int ret;
1437    IO_OR_GS_CODE();
1438
1439    blk_inc_in_flight(blk);
1440    ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
1441    blk_dec_in_flight(blk);
1442
1443    return ret;
1444}
1445
1446int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
1447                               const void *buf, BdrvRequestFlags flags)
1448{
1449    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1450    IO_OR_GS_CODE();
1451
1452    assert(bytes <= SIZE_MAX);
1453
1454    return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
1455}
1456
1457int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1458                                int64_t bytes, QEMUIOVector *qiov,
1459                                BdrvRequestFlags flags)
1460{
1461    IO_OR_GS_CODE();
1462    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
1463}
1464
1465int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
1466                                           BlockDriverState *base,
1467                                           int64_t offset, int64_t bytes,
1468                                           int64_t *pnum, int64_t *map,
1469                                           BlockDriverState **file)
1470{
1471    IO_CODE();
1472    GRAPH_RDLOCK_GUARD();
1473    return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
1474                                      map, file);
1475}
1476
1477int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
1478                                           BlockDriverState *base,
1479                                           bool include_base, int64_t offset,
1480                                           int64_t bytes, int64_t *pnum)
1481{
1482    IO_CODE();
1483    GRAPH_RDLOCK_GUARD();
1484    return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
1485                                      bytes, pnum);
1486}
1487
1488typedef struct BlkRwCo {
1489    BlockBackend *blk;
1490    int64_t offset;
1491    void *iobuf;
1492    int ret;
1493    BdrvRequestFlags flags;
1494} BlkRwCo;
1495
1496int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1497{
1498    GLOBAL_STATE_CODE();
1499    return bdrv_make_zero(blk->root, flags);
1500}
1501
1502void blk_inc_in_flight(BlockBackend *blk)
1503{
1504    IO_CODE();
1505    qatomic_inc(&blk->in_flight);
1506}
1507
1508void blk_dec_in_flight(BlockBackend *blk)
1509{
1510    IO_CODE();
1511    qatomic_dec(&blk->in_flight);
1512    aio_wait_kick();
1513}
1514
1515static void error_callback_bh(void *opaque)
1516{
1517    struct BlockBackendAIOCB *acb = opaque;
1518
1519    blk_dec_in_flight(acb->blk);
1520    acb->common.cb(acb->common.opaque, acb->ret);
1521    qemu_aio_unref(acb);
1522}
1523
1524BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1525                                  BlockCompletionFunc *cb,
1526                                  void *opaque, int ret)
1527{
1528    struct BlockBackendAIOCB *acb;
1529    IO_CODE();
1530
1531    blk_inc_in_flight(blk);
1532    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1533    acb->blk = blk;
1534    acb->ret = ret;
1535
1536    replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1537                                     error_callback_bh, acb);
1538    return &acb->common;
1539}
1540
1541typedef struct BlkAioEmAIOCB {
1542    BlockAIOCB common;
1543    BlkRwCo rwco;
1544    int64_t bytes;
1545    bool has_returned;
1546} BlkAioEmAIOCB;
1547
1548static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
1549{
1550    BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
1551
1552    return blk_get_aio_context(acb->rwco.blk);
1553}
1554
1555static const AIOCBInfo blk_aio_em_aiocb_info = {
1556    .aiocb_size         = sizeof(BlkAioEmAIOCB),
1557    .get_aio_context    = blk_aio_em_aiocb_get_aio_context,
1558};
1559
1560static void blk_aio_complete(BlkAioEmAIOCB *acb)
1561{
1562    if (acb->has_returned) {
1563        acb->common.cb(acb->common.opaque, acb->rwco.ret);
1564        blk_dec_in_flight(acb->rwco.blk);
1565        qemu_aio_unref(acb);
1566    }
1567}
1568
1569static void blk_aio_complete_bh(void *opaque)
1570{
1571    BlkAioEmAIOCB *acb = opaque;
1572    assert(acb->has_returned);
1573    blk_aio_complete(acb);
1574}
1575
1576static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
1577                                int64_t bytes,
1578                                void *iobuf, CoroutineEntry co_entry,
1579                                BdrvRequestFlags flags,
1580                                BlockCompletionFunc *cb, void *opaque)
1581{
1582    BlkAioEmAIOCB *acb;
1583    Coroutine *co;
1584
1585    blk_inc_in_flight(blk);
1586    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1587    acb->rwco = (BlkRwCo) {
1588        .blk    = blk,
1589        .offset = offset,
1590        .iobuf  = iobuf,
1591        .flags  = flags,
1592        .ret    = NOT_DONE,
1593    };
1594    acb->bytes = bytes;
1595    acb->has_returned = false;
1596
1597    co = qemu_coroutine_create(co_entry, acb);
1598    aio_co_enter(blk_get_aio_context(blk), co);
1599
1600    acb->has_returned = true;
1601    if (acb->rwco.ret != NOT_DONE) {
1602        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1603                                         blk_aio_complete_bh, acb);
1604    }
1605
1606    return &acb->common;
1607}
1608
1609static void coroutine_fn blk_aio_read_entry(void *opaque)
1610{
1611    BlkAioEmAIOCB *acb = opaque;
1612    BlkRwCo *rwco = &acb->rwco;
1613    QEMUIOVector *qiov = rwco->iobuf;
1614
1615    assert(qiov->size == acb->bytes);
1616    rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
1617                                      0, rwco->flags);
1618    blk_aio_complete(acb);
1619}
1620
1621static void coroutine_fn blk_aio_write_entry(void *opaque)
1622{
1623    BlkAioEmAIOCB *acb = opaque;
1624    BlkRwCo *rwco = &acb->rwco;
1625    QEMUIOVector *qiov = rwco->iobuf;
1626
1627    assert(!qiov || qiov->size == acb->bytes);
1628    rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
1629                                       qiov, 0, rwco->flags);
1630    blk_aio_complete(acb);
1631}
1632
1633BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1634                                  int64_t bytes, BdrvRequestFlags flags,
1635                                  BlockCompletionFunc *cb, void *opaque)
1636{
1637    IO_CODE();
1638    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
1639                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1640}
1641
1642int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
1643{
1644    IO_CODE();
1645    GRAPH_RDLOCK_GUARD();
1646
1647    if (!blk_co_is_available(blk)) {
1648        return -ENOMEDIUM;
1649    }
1650
1651    return bdrv_co_getlength(blk_bs(blk));
1652}
1653
1654int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
1655{
1656    BlockDriverState *bs = blk_bs(blk);
1657
1658    IO_CODE();
1659    GRAPH_RDLOCK_GUARD();
1660
1661    if (!bs) {
1662        return -ENOMEDIUM;
1663    } else {
1664        return bdrv_co_nb_sectors(bs);
1665    }
1666}
1667
1668/*
1669 * This wrapper is written by hand because this function is in the hot I/O path,
1670 * via blk_get_geometry.
1671 */
1672int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
1673{
1674    BlockDriverState *bs = blk_bs(blk);
1675
1676    IO_CODE();
1677
1678    if (!bs) {
1679        return -ENOMEDIUM;
1680    } else {
1681        return bdrv_nb_sectors(bs);
1682    }
1683}
1684
1685/* return 0 as number of sectors if no device present or error */
1686void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
1687                                      uint64_t *nb_sectors_ptr)
1688{
1689    int64_t ret = blk_co_nb_sectors(blk);
1690    *nb_sectors_ptr = ret < 0 ? 0 : ret;
1691}
1692
1693/*
1694 * This wrapper is written by hand because this function is in the hot I/O path.
1695 */
1696void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
1697                                         uint64_t *nb_sectors_ptr)
1698{
1699    int64_t ret = blk_nb_sectors(blk);
1700    *nb_sectors_ptr = ret < 0 ? 0 : ret;
1701}
1702
1703BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1704                           QEMUIOVector *qiov, BdrvRequestFlags flags,
1705                           BlockCompletionFunc *cb, void *opaque)
1706{
1707    IO_CODE();
1708    assert((uint64_t)qiov->size <= INT64_MAX);
1709    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1710                        blk_aio_read_entry, flags, cb, opaque);
1711}
1712
1713BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1714                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1715                            BlockCompletionFunc *cb, void *opaque)
1716{
1717    IO_CODE();
1718    assert((uint64_t)qiov->size <= INT64_MAX);
1719    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1720                        blk_aio_write_entry, flags, cb, opaque);
1721}
1722
1723void blk_aio_cancel(BlockAIOCB *acb)
1724{
1725    GLOBAL_STATE_CODE();
1726    bdrv_aio_cancel(acb);
1727}
1728
1729void blk_aio_cancel_async(BlockAIOCB *acb)
1730{
1731    IO_CODE();
1732    bdrv_aio_cancel_async(acb);
1733}
1734
1735/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1736static int coroutine_fn
1737blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1738{
1739    IO_CODE();
1740
1741    blk_wait_while_drained(blk);
1742    GRAPH_RDLOCK_GUARD();
1743
1744    if (!blk_co_is_available(blk)) {
1745        return -ENOMEDIUM;
1746    }
1747
1748    return bdrv_co_ioctl(blk_bs(blk), req, buf);
1749}
1750
1751int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
1752                              void *buf)
1753{
1754    int ret;
1755    IO_OR_GS_CODE();
1756
1757    blk_inc_in_flight(blk);
1758    ret = blk_co_do_ioctl(blk, req, buf);
1759    blk_dec_in_flight(blk);
1760
1761    return ret;
1762}
1763
1764static void coroutine_fn blk_aio_ioctl_entry(void *opaque)
1765{
1766    BlkAioEmAIOCB *acb = opaque;
1767    BlkRwCo *rwco = &acb->rwco;
1768
1769    rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1770
1771    blk_aio_complete(acb);
1772}
1773
1774BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1775                          BlockCompletionFunc *cb, void *opaque)
1776{
1777    IO_CODE();
1778    return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
1779}
1780
1781/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1782static int coroutine_fn
1783blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
1784{
1785    int ret;
1786    IO_CODE();
1787
1788    blk_wait_while_drained(blk);
1789    GRAPH_RDLOCK_GUARD();
1790
1791    ret = blk_check_byte_request(blk, offset, bytes);
1792    if (ret < 0) {
1793        return ret;
1794    }
1795
1796    return bdrv_co_pdiscard(blk->root, offset, bytes);
1797}
1798
1799static void coroutine_fn blk_aio_pdiscard_entry(void *opaque)
1800{
1801    BlkAioEmAIOCB *acb = opaque;
1802    BlkRwCo *rwco = &acb->rwco;
1803
1804    rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1805    blk_aio_complete(acb);
1806}
1807
1808BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1809                             int64_t offset, int64_t bytes,
1810                             BlockCompletionFunc *cb, void *opaque)
1811{
1812    IO_CODE();
1813    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
1814                        cb, opaque);
1815}
1816
1817int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
1818                                 int64_t bytes)
1819{
1820    int ret;
1821    IO_OR_GS_CODE();
1822
1823    blk_inc_in_flight(blk);
1824    ret = blk_co_do_pdiscard(blk, offset, bytes);
1825    blk_dec_in_flight(blk);
1826
1827    return ret;
1828}
1829
1830/* To be called between exactly one pair of blk_inc/dec_in_flight() */
1831static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
1832{
1833    IO_CODE();
1834    blk_wait_while_drained(blk);
1835    GRAPH_RDLOCK_GUARD();
1836
1837    if (!blk_co_is_available(blk)) {
1838        return -ENOMEDIUM;
1839    }
1840
1841    return bdrv_co_flush(blk_bs(blk));
1842}
1843
1844static void coroutine_fn blk_aio_flush_entry(void *opaque)
1845{
1846    BlkAioEmAIOCB *acb = opaque;
1847    BlkRwCo *rwco = &acb->rwco;
1848
1849    rwco->ret = blk_co_do_flush(rwco->blk);
1850    blk_aio_complete(acb);
1851}
1852
1853BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1854                          BlockCompletionFunc *cb, void *opaque)
1855{
1856    IO_CODE();
1857    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1858}
1859
1860int coroutine_fn blk_co_flush(BlockBackend *blk)
1861{
1862    int ret;
1863    IO_OR_GS_CODE();
1864
1865    blk_inc_in_flight(blk);
1866    ret = blk_co_do_flush(blk);
1867    blk_dec_in_flight(blk);
1868
1869    return ret;
1870}
1871
1872static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
1873{
1874    BlkAioEmAIOCB *acb = opaque;
1875    BlkRwCo *rwco = &acb->rwco;
1876
1877    rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
1878                                   (unsigned int*)(uintptr_t)acb->bytes,
1879                                   rwco->iobuf);
1880    blk_aio_complete(acb);
1881}
1882
1883BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
1884                                unsigned int *nr_zones,
1885                                BlockZoneDescriptor  *zones,
1886                                BlockCompletionFunc *cb, void *opaque)
1887{
1888    BlkAioEmAIOCB *acb;
1889    Coroutine *co;
1890    IO_CODE();
1891
1892    blk_inc_in_flight(blk);
1893    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1894    acb->rwco = (BlkRwCo) {
1895        .blk    = blk,
1896        .offset = offset,
1897        .iobuf  = zones,
1898        .ret    = NOT_DONE,
1899    };
1900    acb->bytes = (int64_t)(uintptr_t)nr_zones,
1901    acb->has_returned = false;
1902
1903    co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
1904    aio_co_enter(blk_get_aio_context(blk), co);
1905
1906    acb->has_returned = true;
1907    if (acb->rwco.ret != NOT_DONE) {
1908        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1909                                         blk_aio_complete_bh, acb);
1910    }
1911
1912    return &acb->common;
1913}
1914
1915static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
1916{
1917    BlkAioEmAIOCB *acb = opaque;
1918    BlkRwCo *rwco = &acb->rwco;
1919
1920    rwco->ret = blk_co_zone_mgmt(rwco->blk,
1921                                 (BlockZoneOp)(uintptr_t)rwco->iobuf,
1922                                 rwco->offset, acb->bytes);
1923    blk_aio_complete(acb);
1924}
1925
1926BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
1927                              int64_t offset, int64_t len,
1928                              BlockCompletionFunc *cb, void *opaque) {
1929    BlkAioEmAIOCB *acb;
1930    Coroutine *co;
1931    IO_CODE();
1932
1933    blk_inc_in_flight(blk);
1934    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1935    acb->rwco = (BlkRwCo) {
1936        .blk    = blk,
1937        .offset = offset,
1938        .iobuf  = (void *)(uintptr_t)op,
1939        .ret    = NOT_DONE,
1940    };
1941    acb->bytes = len;
1942    acb->has_returned = false;
1943
1944    co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
1945    aio_co_enter(blk_get_aio_context(blk), co);
1946
1947    acb->has_returned = true;
1948    if (acb->rwco.ret != NOT_DONE) {
1949        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1950                                         blk_aio_complete_bh, acb);
1951    }
1952
1953    return &acb->common;
1954}
1955
1956static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
1957{
1958    BlkAioEmAIOCB *acb = opaque;
1959    BlkRwCo *rwco = &acb->rwco;
1960
1961    rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
1962                                   rwco->iobuf, rwco->flags);
1963    blk_aio_complete(acb);
1964}
1965
1966BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
1967                                QEMUIOVector *qiov, BdrvRequestFlags flags,
1968                                BlockCompletionFunc *cb, void *opaque) {
1969    BlkAioEmAIOCB *acb;
1970    Coroutine *co;
1971    IO_CODE();
1972
1973    blk_inc_in_flight(blk);
1974    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1975    acb->rwco = (BlkRwCo) {
1976        .blk    = blk,
1977        .ret    = NOT_DONE,
1978        .flags  = flags,
1979        .iobuf  = qiov,
1980    };
1981    acb->bytes = (int64_t)(uintptr_t)offset;
1982    acb->has_returned = false;
1983
1984    co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
1985    aio_co_enter(blk_get_aio_context(blk), co);
1986    acb->has_returned = true;
1987    if (acb->rwco.ret != NOT_DONE) {
1988        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1989                                         blk_aio_complete_bh, acb);
1990    }
1991
1992    return &acb->common;
1993}
1994
1995/*
1996 * Send a zone_report command.
1997 * offset is a byte offset from the start of the device. No alignment
1998 * required for offset.
1999 * nr_zones represents IN maximum and OUT actual.
2000 */
2001int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
2002                                    unsigned int *nr_zones,
2003                                    BlockZoneDescriptor *zones)
2004{
2005    int ret;
2006    IO_CODE();
2007
2008    blk_inc_in_flight(blk); /* increase before waiting */
2009    blk_wait_while_drained(blk);
2010    GRAPH_RDLOCK_GUARD();
2011    if (!blk_is_available(blk)) {
2012        blk_dec_in_flight(blk);
2013        return -ENOMEDIUM;
2014    }
2015    ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
2016    blk_dec_in_flight(blk);
2017    return ret;
2018}
2019
2020/*
2021 * Send a zone_management command.
2022 * op is the zone operation;
2023 * offset is the byte offset from the start of the zoned device;
2024 * len is the maximum number of bytes the command should operate on. It
2025 * should be aligned with the device zone size.
2026 */
2027int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
2028        int64_t offset, int64_t len)
2029{
2030    int ret;
2031    IO_CODE();
2032
2033    blk_inc_in_flight(blk);
2034    blk_wait_while_drained(blk);
2035    GRAPH_RDLOCK_GUARD();
2036
2037    ret = blk_check_byte_request(blk, offset, len);
2038    if (ret < 0) {
2039        blk_dec_in_flight(blk);
2040        return ret;
2041    }
2042
2043    ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
2044    blk_dec_in_flight(blk);
2045    return ret;
2046}
2047
2048/*
2049 * Send a zone_append command.
2050 */
2051int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
2052        QEMUIOVector *qiov, BdrvRequestFlags flags)
2053{
2054    int ret;
2055    IO_CODE();
2056
2057    blk_inc_in_flight(blk);
2058    blk_wait_while_drained(blk);
2059    GRAPH_RDLOCK_GUARD();
2060    if (!blk_is_available(blk)) {
2061        blk_dec_in_flight(blk);
2062        return -ENOMEDIUM;
2063    }
2064
2065    ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
2066    blk_dec_in_flight(blk);
2067    return ret;
2068}
2069
2070void blk_drain(BlockBackend *blk)
2071{
2072    BlockDriverState *bs = blk_bs(blk);
2073    GLOBAL_STATE_CODE();
2074
2075    if (bs) {
2076        bdrv_ref(bs);
2077        bdrv_drained_begin(bs);
2078    }
2079
2080    /* We may have -ENOMEDIUM completions in flight */
2081    AIO_WAIT_WHILE(blk_get_aio_context(blk),
2082                   qatomic_read(&blk->in_flight) > 0);
2083
2084    if (bs) {
2085        bdrv_drained_end(bs);
2086        bdrv_unref(bs);
2087    }
2088}
2089
2090void blk_drain_all(void)
2091{
2092    BlockBackend *blk = NULL;
2093
2094    GLOBAL_STATE_CODE();
2095
2096    bdrv_drain_all_begin();
2097
2098    while ((blk = blk_all_next(blk)) != NULL) {
2099        /* We may have -ENOMEDIUM completions in flight */
2100        AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
2101    }
2102
2103    bdrv_drain_all_end();
2104}
2105
2106void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
2107                      BlockdevOnError on_write_error)
2108{
2109    GLOBAL_STATE_CODE();
2110    blk->on_read_error = on_read_error;
2111    blk->on_write_error = on_write_error;
2112}
2113
2114BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
2115{
2116    IO_CODE();
2117    return is_read ? blk->on_read_error : blk->on_write_error;
2118}
2119
2120BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
2121                                      int error)
2122{
2123    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
2124    IO_CODE();
2125
2126    switch (on_err) {
2127    case BLOCKDEV_ON_ERROR_ENOSPC:
2128        return (error == ENOSPC) ?
2129               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2130    case BLOCKDEV_ON_ERROR_STOP:
2131        return BLOCK_ERROR_ACTION_STOP;
2132    case BLOCKDEV_ON_ERROR_REPORT:
2133        return BLOCK_ERROR_ACTION_REPORT;
2134    case BLOCKDEV_ON_ERROR_IGNORE:
2135        return BLOCK_ERROR_ACTION_IGNORE;
2136    case BLOCKDEV_ON_ERROR_AUTO:
2137    default:
2138        abort();
2139    }
2140}
2141
2142static void send_qmp_error_event(BlockBackend *blk,
2143                                 BlockErrorAction action,
2144                                 bool is_read, int error)
2145{
2146    IoOperationType optype;
2147    BlockDriverState *bs = blk_bs(blk);
2148
2149    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2150    qapi_event_send_block_io_error(blk_name(blk),
2151                                   bs ? bdrv_get_node_name(bs) : NULL, optype,
2152                                   action, blk_iostatus_is_enabled(blk),
2153                                   error == ENOSPC, strerror(error));
2154}
2155
2156/* This is done by device models because, while the block layer knows
2157 * about the error, it does not know whether an operation comes from
2158 * the device or the block layer (from a job, for example).
2159 */
2160void blk_error_action(BlockBackend *blk, BlockErrorAction action,
2161                      bool is_read, int error)
2162{
2163    assert(error >= 0);
2164    IO_CODE();
2165
2166    if (action == BLOCK_ERROR_ACTION_STOP) {
2167        /* First set the iostatus, so that "info block" returns an iostatus
2168         * that matches the events raised so far (an additional error iostatus
2169         * is fine, but not a lost one).
2170         */
2171        blk_iostatus_set_err(blk, error);
2172
2173        /* Then raise the request to stop the VM and the event.
2174         * qemu_system_vmstop_request_prepare has two effects.  First,
2175         * it ensures that the STOP event always comes after the
2176         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2177         * can observe the STOP event and do a "cont" before the STOP
2178         * event is issued, the VM will not stop.  In this case, vm_start()
2179         * also ensures that the STOP/RESUME pair of events is emitted.
2180         */
2181        qemu_system_vmstop_request_prepare();
2182        send_qmp_error_event(blk, action, is_read, error);
2183        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2184    } else {
2185        send_qmp_error_event(blk, action, is_read, error);
2186    }
2187}
2188
2189/*
2190 * Returns true if the BlockBackend can support taking write permissions
2191 * (because its root node is not read-only).
2192 */
2193bool blk_supports_write_perm(BlockBackend *blk)
2194{
2195    BlockDriverState *bs = blk_bs(blk);
2196    GLOBAL_STATE_CODE();
2197
2198    if (bs) {
2199        return !bdrv_is_read_only(bs);
2200    } else {
2201        return blk->root_state.open_flags & BDRV_O_RDWR;
2202    }
2203}
2204
2205/*
2206 * Returns true if the BlockBackend can be written to in its current
2207 * configuration (i.e. if write permission have been requested)
2208 */
2209bool blk_is_writable(BlockBackend *blk)
2210{
2211    IO_CODE();
2212    return blk->perm & BLK_PERM_WRITE;
2213}
2214
2215bool blk_is_sg(BlockBackend *blk)
2216{
2217    BlockDriverState *bs = blk_bs(blk);
2218    GLOBAL_STATE_CODE();
2219
2220    if (!bs) {
2221        return false;
2222    }
2223
2224    return bdrv_is_sg(bs);
2225}
2226
2227bool blk_enable_write_cache(BlockBackend *blk)
2228{
2229    IO_CODE();
2230    return blk->enable_write_cache;
2231}
2232
2233void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
2234{
2235    IO_CODE();
2236    blk->enable_write_cache = wce;
2237}
2238
2239void blk_activate(BlockBackend *blk, Error **errp)
2240{
2241    BlockDriverState *bs = blk_bs(blk);
2242    GLOBAL_STATE_CODE();
2243
2244    if (!bs) {
2245        error_setg(errp, "Device '%s' has no medium", blk->name);
2246        return;
2247    }
2248
2249    /*
2250     * Migration code can call this function in coroutine context, so leave
2251     * coroutine context if necessary.
2252     */
2253    if (qemu_in_coroutine()) {
2254        bdrv_co_activate(bs, errp);
2255    } else {
2256        bdrv_activate(bs, errp);
2257    }
2258}
2259
2260bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
2261{
2262    BlockDriverState *bs = blk_bs(blk);
2263    IO_CODE();
2264    assert_bdrv_graph_readable();
2265
2266    return bs && bdrv_co_is_inserted(bs);
2267}
2268
2269bool coroutine_fn blk_co_is_available(BlockBackend *blk)
2270{
2271    IO_CODE();
2272    return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
2273}
2274
2275void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
2276{
2277    BlockDriverState *bs = blk_bs(blk);
2278    IO_CODE();
2279    GRAPH_RDLOCK_GUARD();
2280
2281    if (bs) {
2282        bdrv_co_lock_medium(bs, locked);
2283    }
2284}
2285
2286void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag)
2287{
2288    BlockDriverState *bs = blk_bs(blk);
2289    char *id;
2290    IO_CODE();
2291    GRAPH_RDLOCK_GUARD();
2292
2293    if (bs) {
2294        bdrv_co_eject(bs, eject_flag);
2295    }
2296
2297    /* Whether or not we ejected on the backend,
2298     * the frontend experienced a tray event. */
2299    id = blk_get_attached_dev_id(blk);
2300    qapi_event_send_device_tray_moved(blk_name(blk), id,
2301                                      eject_flag);
2302    g_free(id);
2303}
2304
2305int blk_get_flags(BlockBackend *blk)
2306{
2307    BlockDriverState *bs = blk_bs(blk);
2308    GLOBAL_STATE_CODE();
2309
2310    if (bs) {
2311        return bdrv_get_flags(bs);
2312    } else {
2313        return blk->root_state.open_flags;
2314    }
2315}
2316
2317/* Returns the minimum request alignment, in bytes; guaranteed nonzero */
2318uint32_t blk_get_request_alignment(BlockBackend *blk)
2319{
2320    BlockDriverState *bs = blk_bs(blk);
2321    IO_CODE();
2322    return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
2323}
2324
2325/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
2326uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
2327{
2328    BlockDriverState *bs = blk_bs(blk);
2329    uint64_t max = INT_MAX;
2330    IO_CODE();
2331
2332    if (bs) {
2333        max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
2334        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
2335    }
2336    return ROUND_DOWN(max, blk_get_request_alignment(blk));
2337}
2338
2339/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
2340uint32_t blk_get_max_transfer(BlockBackend *blk)
2341{
2342    BlockDriverState *bs = blk_bs(blk);
2343    uint32_t max = INT_MAX;
2344    IO_CODE();
2345
2346    if (bs) {
2347        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
2348    }
2349    return ROUND_DOWN(max, blk_get_request_alignment(blk));
2350}
2351
2352int blk_get_max_hw_iov(BlockBackend *blk)
2353{
2354    IO_CODE();
2355    return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
2356                        blk->root->bs->bl.max_iov);
2357}
2358
2359int blk_get_max_iov(BlockBackend *blk)
2360{
2361    IO_CODE();
2362    return blk->root->bs->bl.max_iov;
2363}
2364
2365void *blk_try_blockalign(BlockBackend *blk, size_t size)
2366{
2367    IO_CODE();
2368    return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
2369}
2370
2371void *blk_blockalign(BlockBackend *blk, size_t size)
2372{
2373    IO_CODE();
2374    return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
2375}
2376
2377bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
2378{
2379    BlockDriverState *bs = blk_bs(blk);
2380    GLOBAL_STATE_CODE();
2381
2382    if (!bs) {
2383        return false;
2384    }
2385
2386    return bdrv_op_is_blocked(bs, op, errp);
2387}
2388
2389void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
2390{
2391    BlockDriverState *bs = blk_bs(blk);
2392    GLOBAL_STATE_CODE();
2393
2394    if (bs) {
2395        bdrv_op_unblock(bs, op, reason);
2396    }
2397}
2398
2399void blk_op_block_all(BlockBackend *blk, Error *reason)
2400{
2401    BlockDriverState *bs = blk_bs(blk);
2402    GLOBAL_STATE_CODE();
2403
2404    if (bs) {
2405        bdrv_op_block_all(bs, reason);
2406    }
2407}
2408
2409void blk_op_unblock_all(BlockBackend *blk, Error *reason)
2410{
2411    BlockDriverState *bs = blk_bs(blk);
2412    GLOBAL_STATE_CODE();
2413
2414    if (bs) {
2415        bdrv_op_unblock_all(bs, reason);
2416    }
2417}
2418
2419AioContext *blk_get_aio_context(BlockBackend *blk)
2420{
2421    BlockDriverState *bs;
2422    IO_CODE();
2423
2424    if (!blk) {
2425        return qemu_get_aio_context();
2426    }
2427
2428    bs = blk_bs(blk);
2429    if (bs) {
2430        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
2431        assert(ctx == blk->ctx);
2432    }
2433
2434    return blk->ctx;
2435}
2436
2437static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
2438{
2439    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
2440    return blk_get_aio_context(blk_acb->blk);
2441}
2442
2443int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
2444                        Error **errp)
2445{
2446    bool old_allow_change;
2447    BlockDriverState *bs = blk_bs(blk);
2448    int ret;
2449
2450    GLOBAL_STATE_CODE();
2451
2452    if (!bs) {
2453        blk->ctx = new_context;
2454        return 0;
2455    }
2456
2457    bdrv_ref(bs);
2458
2459    old_allow_change = blk->allow_aio_context_change;
2460    blk->allow_aio_context_change = true;
2461
2462    ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
2463
2464    blk->allow_aio_context_change = old_allow_change;
2465
2466    bdrv_unref(bs);
2467    return ret;
2468}
2469
2470typedef struct BdrvStateBlkRootContext {
2471    AioContext *new_ctx;
2472    BlockBackend *blk;
2473} BdrvStateBlkRootContext;
2474
2475static void blk_root_set_aio_ctx_commit(void *opaque)
2476{
2477    BdrvStateBlkRootContext *s = opaque;
2478    BlockBackend *blk = s->blk;
2479    AioContext *new_context = s->new_ctx;
2480    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2481
2482    blk->ctx = new_context;
2483    if (tgm->throttle_state) {
2484        throttle_group_detach_aio_context(tgm);
2485        throttle_group_attach_aio_context(tgm, new_context);
2486    }
2487}
2488
2489static TransactionActionDrv set_blk_root_context = {
2490    .commit = blk_root_set_aio_ctx_commit,
2491    .clean = g_free,
2492};
2493
2494static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
2495                                    GHashTable *visited, Transaction *tran,
2496                                    Error **errp)
2497{
2498    BlockBackend *blk = child->opaque;
2499    BdrvStateBlkRootContext *s;
2500
2501    if (!blk->allow_aio_context_change) {
2502        /*
2503         * Manually created BlockBackends (those with a name) that are not
2504         * attached to anything can change their AioContext without updating
2505         * their user; return an error for others.
2506         */
2507        if (!blk->name || blk->dev) {
2508            /* TODO Add BB name/QOM path */
2509            error_setg(errp, "Cannot change iothread of active block backend");
2510            return false;
2511        }
2512    }
2513
2514    s = g_new(BdrvStateBlkRootContext, 1);
2515    *s = (BdrvStateBlkRootContext) {
2516        .new_ctx = ctx,
2517        .blk = blk,
2518    };
2519
2520    tran_add(tran, &set_blk_root_context, s);
2521    return true;
2522}
2523
2524void blk_add_aio_context_notifier(BlockBackend *blk,
2525        void (*attached_aio_context)(AioContext *new_context, void *opaque),
2526        void (*detach_aio_context)(void *opaque), void *opaque)
2527{
2528    BlockBackendAioNotifier *notifier;
2529    BlockDriverState *bs = blk_bs(blk);
2530    GLOBAL_STATE_CODE();
2531
2532    notifier = g_new(BlockBackendAioNotifier, 1);
2533    notifier->attached_aio_context = attached_aio_context;
2534    notifier->detach_aio_context = detach_aio_context;
2535    notifier->opaque = opaque;
2536    QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
2537
2538    if (bs) {
2539        bdrv_add_aio_context_notifier(bs, attached_aio_context,
2540                                      detach_aio_context, opaque);
2541    }
2542}
2543
2544void blk_remove_aio_context_notifier(BlockBackend *blk,
2545                                     void (*attached_aio_context)(AioContext *,
2546                                                                  void *),
2547                                     void (*detach_aio_context)(void *),
2548                                     void *opaque)
2549{
2550    BlockBackendAioNotifier *notifier;
2551    BlockDriverState *bs = blk_bs(blk);
2552
2553    GLOBAL_STATE_CODE();
2554
2555    if (bs) {
2556        bdrv_remove_aio_context_notifier(bs, attached_aio_context,
2557                                         detach_aio_context, opaque);
2558    }
2559
2560    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
2561        if (notifier->attached_aio_context == attached_aio_context &&
2562            notifier->detach_aio_context == detach_aio_context &&
2563            notifier->opaque == opaque) {
2564            QLIST_REMOVE(notifier, list);
2565            g_free(notifier);
2566            return;
2567        }
2568    }
2569
2570    abort();
2571}
2572
2573void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
2574{
2575    GLOBAL_STATE_CODE();
2576    notifier_list_add(&blk->remove_bs_notifiers, notify);
2577}
2578
2579void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
2580{
2581    GLOBAL_STATE_CODE();
2582    notifier_list_add(&blk->insert_bs_notifiers, notify);
2583}
2584
2585BlockAcctStats *blk_get_stats(BlockBackend *blk)
2586{
2587    IO_CODE();
2588    return &blk->stats;
2589}
2590
2591void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
2592                  BlockCompletionFunc *cb, void *opaque)
2593{
2594    IO_CODE();
2595    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
2596}
2597
2598int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
2599                                      int64_t bytes, BdrvRequestFlags flags)
2600{
2601    IO_OR_GS_CODE();
2602    return blk_co_pwritev(blk, offset, bytes, NULL,
2603                          flags | BDRV_REQ_ZERO_WRITE);
2604}
2605
2606int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
2607                                          int64_t bytes, const void *buf)
2608{
2609    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
2610    IO_OR_GS_CODE();
2611    return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0,
2612                               BDRV_REQ_WRITE_COMPRESSED);
2613}
2614
2615int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
2616                                 PreallocMode prealloc, BdrvRequestFlags flags,
2617                                 Error **errp)
2618{
2619    IO_OR_GS_CODE();
2620    GRAPH_RDLOCK_GUARD();
2621    if (!blk_co_is_available(blk)) {
2622        error_setg(errp, "No medium inserted");
2623        return -ENOMEDIUM;
2624    }
2625
2626    return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp);
2627}
2628
2629int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
2630                     int64_t pos, int size)
2631{
2632    int ret;
2633    GLOBAL_STATE_CODE();
2634
2635    if (!blk_is_available(blk)) {
2636        return -ENOMEDIUM;
2637    }
2638
2639    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2640    if (ret < 0) {
2641        return ret;
2642    }
2643
2644    if (ret == size && !blk->enable_write_cache) {
2645        ret = bdrv_flush(blk_bs(blk));
2646    }
2647
2648    return ret < 0 ? ret : size;
2649}
2650
2651int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
2652{
2653    GLOBAL_STATE_CODE();
2654    if (!blk_is_available(blk)) {
2655        return -ENOMEDIUM;
2656    }
2657
2658    return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2659}
2660
2661int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
2662{
2663    GLOBAL_STATE_CODE();
2664    if (!blk_is_available(blk)) {
2665        return -ENOMEDIUM;
2666    }
2667
2668    return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2669}
2670
2671int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
2672{
2673    GLOBAL_STATE_CODE();
2674    if (!blk_is_available(blk)) {
2675        return -ENOMEDIUM;
2676    }
2677
2678    return bdrv_probe_geometry(blk_bs(blk), geo);
2679}
2680
2681/*
2682 * Updates the BlockBackendRootState object with data from the currently
2683 * attached BlockDriverState.
2684 */
2685void blk_update_root_state(BlockBackend *blk)
2686{
2687    GLOBAL_STATE_CODE();
2688    assert(blk->root);
2689
2690    blk->root_state.open_flags    = blk->root->bs->open_flags;
2691    blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2692}
2693
2694/*
2695 * Returns the detect-zeroes setting to be used for bdrv_open() of a
2696 * BlockDriverState which is supposed to inherit the root state.
2697 */
2698bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2699{
2700    GLOBAL_STATE_CODE();
2701    return blk->root_state.detect_zeroes;
2702}
2703
2704/*
2705 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2706 * supposed to inherit the root state.
2707 */
2708int blk_get_open_flags_from_root_state(BlockBackend *blk)
2709{
2710    GLOBAL_STATE_CODE();
2711    return blk->root_state.open_flags;
2712}
2713
2714BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
2715{
2716    GLOBAL_STATE_CODE();
2717    return &blk->root_state;
2718}
2719
2720int blk_commit_all(void)
2721{
2722    BlockBackend *blk = NULL;
2723    GLOBAL_STATE_CODE();
2724
2725    while ((blk = blk_all_next(blk)) != NULL) {
2726        AioContext *aio_context = blk_get_aio_context(blk);
2727        BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
2728
2729        aio_context_acquire(aio_context);
2730        if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
2731            int ret;
2732
2733            ret = bdrv_commit(unfiltered_bs);
2734            if (ret < 0) {
2735                aio_context_release(aio_context);
2736                return ret;
2737            }
2738        }
2739        aio_context_release(aio_context);
2740    }
2741    return 0;
2742}
2743
2744
2745/* throttling disk I/O limits */
2746void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
2747{
2748    GLOBAL_STATE_CODE();
2749    throttle_group_config(&blk->public.throttle_group_member, cfg);
2750}
2751
2752void blk_io_limits_disable(BlockBackend *blk)
2753{
2754    BlockDriverState *bs = blk_bs(blk);
2755    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2756    assert(tgm->throttle_state);
2757    GLOBAL_STATE_CODE();
2758    if (bs) {
2759        bdrv_ref(bs);
2760        bdrv_drained_begin(bs);
2761    }
2762    throttle_group_unregister_tgm(tgm);
2763    if (bs) {
2764        bdrv_drained_end(bs);
2765        bdrv_unref(bs);
2766    }
2767}
2768
2769/* should be called before blk_set_io_limits if a limit is set */
2770void blk_io_limits_enable(BlockBackend *blk, const char *group)
2771{
2772    assert(!blk->public.throttle_group_member.throttle_state);
2773    GLOBAL_STATE_CODE();
2774    throttle_group_register_tgm(&blk->public.throttle_group_member,
2775                                group, blk_get_aio_context(blk));
2776}
2777
2778void blk_io_limits_update_group(BlockBackend *blk, const char *group)
2779{
2780    GLOBAL_STATE_CODE();
2781    /* this BB is not part of any group */
2782    if (!blk->public.throttle_group_member.throttle_state) {
2783        return;
2784    }
2785
2786    /* this BB is a part of the same group than the one we want */
2787    if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2788                group)) {
2789        return;
2790    }
2791
2792    /* need to change the group this bs belong to */
2793    blk_io_limits_disable(blk);
2794    blk_io_limits_enable(blk, group);
2795}
2796
2797static void blk_root_drained_begin(BdrvChild *child)
2798{
2799    BlockBackend *blk = child->opaque;
2800    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2801
2802    if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
2803        if (blk->dev_ops && blk->dev_ops->drained_begin) {
2804            blk->dev_ops->drained_begin(blk->dev_opaque);
2805        }
2806    }
2807
2808    /* Note that blk->root may not be accessible here yet if we are just
2809     * attaching to a BlockDriverState that is drained. Use child instead. */
2810
2811    if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
2812        throttle_group_restart_tgm(tgm);
2813    }
2814}
2815
2816static bool blk_root_drained_poll(BdrvChild *child)
2817{
2818    BlockBackend *blk = child->opaque;
2819    bool busy = false;
2820    assert(qatomic_read(&blk->quiesce_counter));
2821
2822    if (blk->dev_ops && blk->dev_ops->drained_poll) {
2823        busy = blk->dev_ops->drained_poll(blk->dev_opaque);
2824    }
2825    return busy || !!blk->in_flight;
2826}
2827
2828static void blk_root_drained_end(BdrvChild *child)
2829{
2830    BlockBackend *blk = child->opaque;
2831    assert(qatomic_read(&blk->quiesce_counter));
2832
2833    assert(blk->public.throttle_group_member.io_limits_disabled);
2834    qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2835
2836    if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
2837        if (blk->dev_ops && blk->dev_ops->drained_end) {
2838            blk->dev_ops->drained_end(blk->dev_opaque);
2839        }
2840        qemu_mutex_lock(&blk->queued_requests_lock);
2841        while (qemu_co_enter_next(&blk->queued_requests,
2842                                  &blk->queued_requests_lock)) {
2843            /* Resume all queued requests */
2844        }
2845        qemu_mutex_unlock(&blk->queued_requests_lock);
2846    }
2847}
2848
2849bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
2850{
2851    BlockDriverState *bs = blk_bs(blk);
2852
2853    GLOBAL_STATE_CODE();
2854
2855    if (bs) {
2856        return bdrv_register_buf(bs, host, size, errp);
2857    }
2858    return true;
2859}
2860
2861void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
2862{
2863    BlockDriverState *bs = blk_bs(blk);
2864
2865    GLOBAL_STATE_CODE();
2866
2867    if (bs) {
2868        bdrv_unregister_buf(bs, host, size);
2869    }
2870}
2871
2872int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2873                                   BlockBackend *blk_out, int64_t off_out,
2874                                   int64_t bytes, BdrvRequestFlags read_flags,
2875                                   BdrvRequestFlags write_flags)
2876{
2877    int r;
2878    IO_CODE();
2879    GRAPH_RDLOCK_GUARD();
2880
2881    r = blk_check_byte_request(blk_in, off_in, bytes);
2882    if (r) {
2883        return r;
2884    }
2885    r = blk_check_byte_request(blk_out, off_out, bytes);
2886    if (r) {
2887        return r;
2888    }
2889
2890    return bdrv_co_copy_range(blk_in->root, off_in,
2891                              blk_out->root, off_out,
2892                              bytes, read_flags, write_flags);
2893}
2894
2895const BdrvChild *blk_root(BlockBackend *blk)
2896{
2897    GLOBAL_STATE_CODE();
2898    return blk->root;
2899}
2900
2901int blk_make_empty(BlockBackend *blk, Error **errp)
2902{
2903    GLOBAL_STATE_CODE();
2904    if (!blk_is_available(blk)) {
2905        error_setg(errp, "No medium inserted");
2906        return -ENOMEDIUM;
2907    }
2908
2909    return bdrv_make_empty(blk->root, errp);
2910}
2911