qemu/block/block-backend.c
<<
>>
Prefs
   1/*
   2 * QEMU Block backends
   3 *
   4 * Copyright (C) 2014-2016 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Markus Armbruster <armbru@redhat.com>,
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1
  10 * or later.  See the COPYING.LIB file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/block-backend.h"
  15#include "block/block_int.h"
  16#include "block/blockjob.h"
  17#include "block/throttle-groups.h"
  18#include "sysemu/blockdev.h"
  19#include "sysemu/sysemu.h"
  20#include "qapi-event.h"
  21#include "qemu/id.h"
  22#include "trace.h"
  23
  24/* Number of coroutines to reserve per attached device model */
  25#define COROUTINE_POOL_RESERVATION 64
  26
  27#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  28
  29static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
  30
  31struct BlockBackend {
  32    char *name;
  33    int refcnt;
  34    BdrvChild *root;
  35    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
  36    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
  37    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
  38    BlockBackendPublic public;
  39
  40    void *dev;                  /* attached device model, if any */
  41    /* TODO change to DeviceState when all users are qdevified */
  42    const BlockDevOps *dev_ops;
  43    void *dev_opaque;
  44
  45    /* the block size for which the guest device expects atomicity */
  46    int guest_block_size;
  47
  48    /* If the BDS tree is removed, some of its options are stored here (which
  49     * can be used to restore those options in the new BDS on insert) */
  50    BlockBackendRootState root_state;
  51
  52    bool enable_write_cache;
  53
  54    /* I/O stats (display with "info blockstats"). */
  55    BlockAcctStats stats;
  56
  57    BlockdevOnError on_read_error, on_write_error;
  58    bool iostatus_enabled;
  59    BlockDeviceIoStatus iostatus;
  60
  61    bool allow_write_beyond_eof;
  62
  63    NotifierList remove_bs_notifiers, insert_bs_notifiers;
  64};
  65
  66typedef struct BlockBackendAIOCB {
  67    BlockAIOCB common;
  68    QEMUBH *bh;
  69    BlockBackend *blk;
  70    int ret;
  71} BlockBackendAIOCB;
  72
  73static const AIOCBInfo block_backend_aiocb_info = {
  74    .get_aio_context = blk_aiocb_get_aio_context,
  75    .aiocb_size = sizeof(BlockBackendAIOCB),
  76};
  77
  78static void drive_info_del(DriveInfo *dinfo);
  79static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
  80
  81/* All BlockBackends */
  82static QTAILQ_HEAD(, BlockBackend) block_backends =
  83    QTAILQ_HEAD_INITIALIZER(block_backends);
  84
  85/* All BlockBackends referenced by the monitor and which are iterated through by
  86 * blk_next() */
  87static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
  88    QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
  89
  90static void blk_root_inherit_options(int *child_flags, QDict *child_options,
  91                                     int parent_flags, QDict *parent_options)
  92{
  93    /* We're not supposed to call this function for root nodes */
  94    abort();
  95}
  96static void blk_root_drained_begin(BdrvChild *child);
  97static void blk_root_drained_end(BdrvChild *child);
  98
  99static void blk_root_change_media(BdrvChild *child, bool load);
 100static void blk_root_resize(BdrvChild *child);
 101
 102static const char *blk_root_get_name(BdrvChild *child)
 103{
 104    return blk_name(child->opaque);
 105}
 106
 107static const BdrvChildRole child_root = {
 108    .inherit_options    = blk_root_inherit_options,
 109
 110    .change_media       = blk_root_change_media,
 111    .resize             = blk_root_resize,
 112    .get_name           = blk_root_get_name,
 113
 114    .drained_begin      = blk_root_drained_begin,
 115    .drained_end        = blk_root_drained_end,
 116};
 117
 118/*
 119 * Create a new BlockBackend with a reference count of one.
 120 * Store an error through @errp on failure, unless it's null.
 121 * Return the new BlockBackend on success, null on failure.
 122 */
 123BlockBackend *blk_new(void)
 124{
 125    BlockBackend *blk;
 126
 127    blk = g_new0(BlockBackend, 1);
 128    blk->refcnt = 1;
 129    blk_set_enable_write_cache(blk, true);
 130
 131    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
 132    qemu_co_queue_init(&blk->public.throttled_reqs[1]);
 133
 134    notifier_list_init(&blk->remove_bs_notifiers);
 135    notifier_list_init(&blk->insert_bs_notifiers);
 136
 137    QTAILQ_INSERT_TAIL(&block_backends, blk, link);
 138    return blk;
 139}
 140
 141/*
 142 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
 143 *
 144 * Just as with bdrv_open(), after having called this function the reference to
 145 * @options belongs to the block layer (even on failure).
 146 *
 147 * TODO: Remove @filename and @flags; it should be possible to specify a whole
 148 * BDS tree just by specifying the @options QDict (or @reference,
 149 * alternatively). At the time of adding this function, this is not possible,
 150 * though, so callers of this function have to be able to specify @filename and
 151 * @flags.
 152 */
 153BlockBackend *blk_new_open(const char *filename, const char *reference,
 154                           QDict *options, int flags, Error **errp)
 155{
 156    BlockBackend *blk;
 157    BlockDriverState *bs;
 158
 159    blk = blk_new();
 160    bs = bdrv_open(filename, reference, options, flags, errp);
 161    if (!bs) {
 162        blk_unref(blk);
 163        return NULL;
 164    }
 165
 166    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 167
 168    return blk;
 169}
 170
 171static void blk_delete(BlockBackend *blk)
 172{
 173    assert(!blk->refcnt);
 174    assert(!blk->name);
 175    assert(!blk->dev);
 176    if (blk->root) {
 177        blk_remove_bs(blk);
 178    }
 179    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
 180    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
 181    QTAILQ_REMOVE(&block_backends, blk, link);
 182    drive_info_del(blk->legacy_dinfo);
 183    block_acct_cleanup(&blk->stats);
 184    g_free(blk);
 185}
 186
 187static void drive_info_del(DriveInfo *dinfo)
 188{
 189    if (!dinfo) {
 190        return;
 191    }
 192    qemu_opts_del(dinfo->opts);
 193    g_free(dinfo->serial);
 194    g_free(dinfo);
 195}
 196
 197int blk_get_refcnt(BlockBackend *blk)
 198{
 199    return blk ? blk->refcnt : 0;
 200}
 201
 202/*
 203 * Increment @blk's reference count.
 204 * @blk must not be null.
 205 */
 206void blk_ref(BlockBackend *blk)
 207{
 208    blk->refcnt++;
 209}
 210
 211/*
 212 * Decrement @blk's reference count.
 213 * If this drops it to zero, destroy @blk.
 214 * For convenience, do nothing if @blk is null.
 215 */
 216void blk_unref(BlockBackend *blk)
 217{
 218    if (blk) {
 219        assert(blk->refcnt > 0);
 220        if (!--blk->refcnt) {
 221            blk_delete(blk);
 222        }
 223    }
 224}
 225
 226/*
 227 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 228 * ones which are hidden (i.e. are not referenced by the monitor).
 229 */
 230static BlockBackend *blk_all_next(BlockBackend *blk)
 231{
 232    return blk ? QTAILQ_NEXT(blk, link)
 233               : QTAILQ_FIRST(&block_backends);
 234}
 235
 236void blk_remove_all_bs(void)
 237{
 238    BlockBackend *blk = NULL;
 239
 240    while ((blk = blk_all_next(blk)) != NULL) {
 241        AioContext *ctx = blk_get_aio_context(blk);
 242
 243        aio_context_acquire(ctx);
 244        if (blk->root) {
 245            blk_remove_bs(blk);
 246        }
 247        aio_context_release(ctx);
 248    }
 249}
 250
 251/*
 252 * Return the monitor-owned BlockBackend after @blk.
 253 * If @blk is null, return the first one.
 254 * Else, return @blk's next sibling, which may be null.
 255 *
 256 * To iterate over all BlockBackends, do
 257 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 258 *     ...
 259 * }
 260 */
 261BlockBackend *blk_next(BlockBackend *blk)
 262{
 263    return blk ? QTAILQ_NEXT(blk, monitor_link)
 264               : QTAILQ_FIRST(&monitor_block_backends);
 265}
 266
 267/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
 268 * the monitor or attached to a BlockBackend */
 269BlockDriverState *bdrv_next(BdrvNextIterator *it)
 270{
 271    BlockDriverState *bs;
 272
 273    /* First, return all root nodes of BlockBackends. In order to avoid
 274     * returning a BDS twice when multiple BBs refer to it, we only return it
 275     * if the BB is the first one in the parent list of the BDS. */
 276    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
 277        do {
 278            it->blk = blk_all_next(it->blk);
 279            bs = it->blk ? blk_bs(it->blk) : NULL;
 280        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 281
 282        if (bs) {
 283            return bs;
 284        }
 285        it->phase = BDRV_NEXT_MONITOR_OWNED;
 286    }
 287
 288    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
 289     * BDSes that are attached to a BlockBackend here; they have been handled
 290     * by the above block already */
 291    do {
 292        it->bs = bdrv_next_monitor_owned(it->bs);
 293        bs = it->bs;
 294    } while (bs && bdrv_has_blk(bs));
 295
 296    return bs;
 297}
 298
 299BlockDriverState *bdrv_first(BdrvNextIterator *it)
 300{
 301    *it = (BdrvNextIterator) {
 302        .phase = BDRV_NEXT_BACKEND_ROOTS,
 303    };
 304
 305    return bdrv_next(it);
 306}
 307
 308/*
 309 * Add a BlockBackend into the list of backends referenced by the monitor, with
 310 * the given @name acting as the handle for the monitor.
 311 * Strictly for use by blockdev.c.
 312 *
 313 * @name must not be null or empty.
 314 *
 315 * Returns true on success and false on failure. In the latter case, an Error
 316 * object is returned through @errp.
 317 */
 318bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
 319{
 320    assert(!blk->name);
 321    assert(name && name[0]);
 322
 323    if (!id_wellformed(name)) {
 324        error_setg(errp, "Invalid device name");
 325        return false;
 326    }
 327    if (blk_by_name(name)) {
 328        error_setg(errp, "Device with id '%s' already exists", name);
 329        return false;
 330    }
 331    if (bdrv_find_node(name)) {
 332        error_setg(errp,
 333                   "Device name '%s' conflicts with an existing node name",
 334                   name);
 335        return false;
 336    }
 337
 338    blk->name = g_strdup(name);
 339    QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
 340    return true;
 341}
 342
 343/*
 344 * Remove a BlockBackend from the list of backends referenced by the monitor.
 345 * Strictly for use by blockdev.c.
 346 */
 347void monitor_remove_blk(BlockBackend *blk)
 348{
 349    if (!blk->name) {
 350        return;
 351    }
 352
 353    QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
 354    g_free(blk->name);
 355    blk->name = NULL;
 356}
 357
 358/*
 359 * Return @blk's name, a non-null string.
 360 * Returns an empty string iff @blk is not referenced by the monitor.
 361 */
 362const char *blk_name(BlockBackend *blk)
 363{
 364    return blk->name ?: "";
 365}
 366
 367/*
 368 * Return the BlockBackend with name @name if it exists, else null.
 369 * @name must not be null.
 370 */
 371BlockBackend *blk_by_name(const char *name)
 372{
 373    BlockBackend *blk = NULL;
 374
 375    assert(name);
 376    while ((blk = blk_next(blk)) != NULL) {
 377        if (!strcmp(name, blk->name)) {
 378            return blk;
 379        }
 380    }
 381    return NULL;
 382}
 383
 384/*
 385 * Return the BlockDriverState attached to @blk if any, else null.
 386 */
 387BlockDriverState *blk_bs(BlockBackend *blk)
 388{
 389    return blk->root ? blk->root->bs : NULL;
 390}
 391
 392static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
 393{
 394    BdrvChild *child;
 395    QLIST_FOREACH(child, &bs->parents, next_parent) {
 396        if (child->role == &child_root) {
 397            return child->opaque;
 398        }
 399    }
 400
 401    return NULL;
 402}
 403
 404/*
 405 * Returns true if @bs has an associated BlockBackend.
 406 */
 407bool bdrv_has_blk(BlockDriverState *bs)
 408{
 409    return bdrv_first_blk(bs) != NULL;
 410}
 411
 412/*
 413 * Return @blk's DriveInfo if any, else null.
 414 */
 415DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 416{
 417    return blk->legacy_dinfo;
 418}
 419
 420/*
 421 * Set @blk's DriveInfo to @dinfo, and return it.
 422 * @blk must not have a DriveInfo set already.
 423 * No other BlockBackend may have the same DriveInfo set.
 424 */
 425DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 426{
 427    assert(!blk->legacy_dinfo);
 428    return blk->legacy_dinfo = dinfo;
 429}
 430
 431/*
 432 * Return the BlockBackend with DriveInfo @dinfo.
 433 * It must exist.
 434 */
 435BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 436{
 437    BlockBackend *blk = NULL;
 438
 439    while ((blk = blk_next(blk)) != NULL) {
 440        if (blk->legacy_dinfo == dinfo) {
 441            return blk;
 442        }
 443    }
 444    abort();
 445}
 446
 447/*
 448 * Returns a pointer to the publicly accessible fields of @blk.
 449 */
 450BlockBackendPublic *blk_get_public(BlockBackend *blk)
 451{
 452    return &blk->public;
 453}
 454
 455/*
 456 * Returns a BlockBackend given the associated @public fields.
 457 */
 458BlockBackend *blk_by_public(BlockBackendPublic *public)
 459{
 460    return container_of(public, BlockBackend, public);
 461}
 462
 463/*
 464 * Disassociates the currently associated BlockDriverState from @blk.
 465 */
 466void blk_remove_bs(BlockBackend *blk)
 467{
 468    notifier_list_notify(&blk->remove_bs_notifiers, blk);
 469    if (blk->public.throttle_state) {
 470        throttle_timers_detach_aio_context(&blk->public.throttle_timers);
 471    }
 472
 473    blk_update_root_state(blk);
 474
 475    bdrv_root_unref_child(blk->root);
 476    blk->root = NULL;
 477}
 478
 479/*
 480 * Associates a new BlockDriverState with @blk.
 481 */
 482void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 483{
 484    bdrv_ref(bs);
 485    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 486
 487    notifier_list_notify(&blk->insert_bs_notifiers, blk);
 488    if (blk->public.throttle_state) {
 489        throttle_timers_attach_aio_context(
 490            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
 491    }
 492}
 493
 494/*
 495 * Attach device model @dev to @blk.
 496 * Return 0 on success, -EBUSY when a device model is attached already.
 497 */
 498int blk_attach_dev(BlockBackend *blk, void *dev)
 499/* TODO change to DeviceState *dev when all users are qdevified */
 500{
 501    if (blk->dev) {
 502        return -EBUSY;
 503    }
 504    blk_ref(blk);
 505    blk->dev = dev;
 506    blk_iostatus_reset(blk);
 507    return 0;
 508}
 509
 510/*
 511 * Attach device model @dev to @blk.
 512 * @blk must not have a device model attached already.
 513 * TODO qdevified devices don't use this, remove when devices are qdevified
 514 */
 515void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
 516{
 517    if (blk_attach_dev(blk, dev) < 0) {
 518        abort();
 519    }
 520}
 521
 522/*
 523 * Detach device model @dev from @blk.
 524 * @dev must be currently attached to @blk.
 525 */
 526void blk_detach_dev(BlockBackend *blk, void *dev)
 527/* TODO change to DeviceState *dev when all users are qdevified */
 528{
 529    assert(blk->dev == dev);
 530    blk->dev = NULL;
 531    blk->dev_ops = NULL;
 532    blk->dev_opaque = NULL;
 533    blk->guest_block_size = 512;
 534    blk_unref(blk);
 535}
 536
 537/*
 538 * Return the device model attached to @blk if any, else null.
 539 */
 540void *blk_get_attached_dev(BlockBackend *blk)
 541/* TODO change to return DeviceState * when all users are qdevified */
 542{
 543    return blk->dev;
 544}
 545
 546/*
 547 * Set @blk's device model callbacks to @ops.
 548 * @opaque is the opaque argument to pass to the callbacks.
 549 * This is for use by device models.
 550 */
 551void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
 552                     void *opaque)
 553{
 554    blk->dev_ops = ops;
 555    blk->dev_opaque = opaque;
 556}
 557
 558/*
 559 * Notify @blk's attached device model of media change.
 560 * If @load is true, notify of media load.
 561 * Else, notify of media eject.
 562 * Also send DEVICE_TRAY_MOVED events as appropriate.
 563 */
 564void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 565{
 566    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
 567        bool tray_was_open, tray_is_open;
 568
 569        tray_was_open = blk_dev_is_tray_open(blk);
 570        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
 571        tray_is_open = blk_dev_is_tray_open(blk);
 572
 573        if (tray_was_open != tray_is_open) {
 574            qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
 575                                              &error_abort);
 576        }
 577    }
 578}
 579
 580static void blk_root_change_media(BdrvChild *child, bool load)
 581{
 582    blk_dev_change_media_cb(child->opaque, load);
 583}
 584
 585/*
 586 * Does @blk's attached device model have removable media?
 587 * %true if no device model is attached.
 588 */
 589bool blk_dev_has_removable_media(BlockBackend *blk)
 590{
 591    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
 592}
 593
 594/*
 595 * Does @blk's attached device model have a tray?
 596 */
 597bool blk_dev_has_tray(BlockBackend *blk)
 598{
 599    return blk->dev_ops && blk->dev_ops->is_tray_open;
 600}
 601
 602/*
 603 * Notify @blk's attached device model of a media eject request.
 604 * If @force is true, the medium is about to be yanked out forcefully.
 605 */
 606void blk_dev_eject_request(BlockBackend *blk, bool force)
 607{
 608    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
 609        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
 610    }
 611}
 612
 613/*
 614 * Does @blk's attached device model have a tray, and is it open?
 615 */
 616bool blk_dev_is_tray_open(BlockBackend *blk)
 617{
 618    if (blk_dev_has_tray(blk)) {
 619        return blk->dev_ops->is_tray_open(blk->dev_opaque);
 620    }
 621    return false;
 622}
 623
 624/*
 625 * Does @blk's attached device model have the medium locked?
 626 * %false if the device model has no such lock.
 627 */
 628bool blk_dev_is_medium_locked(BlockBackend *blk)
 629{
 630    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
 631        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
 632    }
 633    return false;
 634}
 635
 636/*
 637 * Notify @blk's attached device model of a backend size change.
 638 */
 639static void blk_root_resize(BdrvChild *child)
 640{
 641    BlockBackend *blk = child->opaque;
 642
 643    if (blk->dev_ops && blk->dev_ops->resize_cb) {
 644        blk->dev_ops->resize_cb(blk->dev_opaque);
 645    }
 646}
 647
 648void blk_iostatus_enable(BlockBackend *blk)
 649{
 650    blk->iostatus_enabled = true;
 651    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 652}
 653
 654/* The I/O status is only enabled if the drive explicitly
 655 * enables it _and_ the VM is configured to stop on errors */
 656bool blk_iostatus_is_enabled(const BlockBackend *blk)
 657{
 658    return (blk->iostatus_enabled &&
 659           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
 660            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
 661            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 662}
 663
 664BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
 665{
 666    return blk->iostatus;
 667}
 668
 669void blk_iostatus_disable(BlockBackend *blk)
 670{
 671    blk->iostatus_enabled = false;
 672}
 673
 674void blk_iostatus_reset(BlockBackend *blk)
 675{
 676    if (blk_iostatus_is_enabled(blk)) {
 677        BlockDriverState *bs = blk_bs(blk);
 678        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 679        if (bs && bs->job) {
 680            block_job_iostatus_reset(bs->job);
 681        }
 682    }
 683}
 684
 685void blk_iostatus_set_err(BlockBackend *blk, int error)
 686{
 687    assert(blk_iostatus_is_enabled(blk));
 688    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 689        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
 690                                          BLOCK_DEVICE_IO_STATUS_FAILED;
 691    }
 692}
 693
 694void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
 695{
 696    blk->allow_write_beyond_eof = allow;
 697}
 698
 699static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
 700                                  size_t size)
 701{
 702    int64_t len;
 703
 704    if (size > INT_MAX) {
 705        return -EIO;
 706    }
 707
 708    if (!blk_is_available(blk)) {
 709        return -ENOMEDIUM;
 710    }
 711
 712    if (offset < 0) {
 713        return -EIO;
 714    }
 715
 716    if (!blk->allow_write_beyond_eof) {
 717        len = blk_getlength(blk);
 718        if (len < 0) {
 719            return len;
 720        }
 721
 722        if (offset > len || len - offset < size) {
 723            return -EIO;
 724        }
 725    }
 726
 727    return 0;
 728}
 729
 730static int blk_check_request(BlockBackend *blk, int64_t sector_num,
 731                             int nb_sectors)
 732{
 733    if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
 734        return -EIO;
 735    }
 736
 737    if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
 738        return -EIO;
 739    }
 740
 741    return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
 742                                  nb_sectors * BDRV_SECTOR_SIZE);
 743}
 744
 745int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
 746                               unsigned int bytes, QEMUIOVector *qiov,
 747                               BdrvRequestFlags flags)
 748{
 749    int ret;
 750
 751    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
 752
 753    ret = blk_check_byte_request(blk, offset, bytes);
 754    if (ret < 0) {
 755        return ret;
 756    }
 757
 758    /* throttling disk I/O */
 759    if (blk->public.throttle_state) {
 760        throttle_group_co_io_limits_intercept(blk, bytes, false);
 761    }
 762
 763    return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
 764}
 765
 766int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
 767                                unsigned int bytes, QEMUIOVector *qiov,
 768                                BdrvRequestFlags flags)
 769{
 770    int ret;
 771
 772    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
 773
 774    ret = blk_check_byte_request(blk, offset, bytes);
 775    if (ret < 0) {
 776        return ret;
 777    }
 778
 779    /* throttling disk I/O */
 780    if (blk->public.throttle_state) {
 781        throttle_group_co_io_limits_intercept(blk, bytes, true);
 782    }
 783
 784    if (!blk->enable_write_cache) {
 785        flags |= BDRV_REQ_FUA;
 786    }
 787
 788    return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
 789}
 790
 791typedef struct BlkRwCo {
 792    BlockBackend *blk;
 793    int64_t offset;
 794    QEMUIOVector *qiov;
 795    int ret;
 796    BdrvRequestFlags flags;
 797} BlkRwCo;
 798
 799static void blk_read_entry(void *opaque)
 800{
 801    BlkRwCo *rwco = opaque;
 802
 803    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
 804                              rwco->qiov, rwco->flags);
 805}
 806
 807static void blk_write_entry(void *opaque)
 808{
 809    BlkRwCo *rwco = opaque;
 810
 811    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
 812                               rwco->qiov, rwco->flags);
 813}
 814
 815static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 816                   int64_t bytes, CoroutineEntry co_entry,
 817                   BdrvRequestFlags flags)
 818{
 819    AioContext *aio_context;
 820    QEMUIOVector qiov;
 821    struct iovec iov;
 822    Coroutine *co;
 823    BlkRwCo rwco;
 824
 825    iov = (struct iovec) {
 826        .iov_base = buf,
 827        .iov_len = bytes,
 828    };
 829    qemu_iovec_init_external(&qiov, &iov, 1);
 830
 831    rwco = (BlkRwCo) {
 832        .blk    = blk,
 833        .offset = offset,
 834        .qiov   = &qiov,
 835        .flags  = flags,
 836        .ret    = NOT_DONE,
 837    };
 838
 839    co = qemu_coroutine_create(co_entry, &rwco);
 840    qemu_coroutine_enter(co);
 841
 842    aio_context = blk_get_aio_context(blk);
 843    while (rwco.ret == NOT_DONE) {
 844        aio_poll(aio_context, true);
 845    }
 846
 847    return rwco.ret;
 848}
 849
 850int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
 851                          int count)
 852{
 853    int ret;
 854
 855    ret = blk_check_byte_request(blk, offset, count);
 856    if (ret < 0) {
 857        return ret;
 858    }
 859
 860    blk_root_drained_begin(blk->root);
 861    ret = blk_pread(blk, offset, buf, count);
 862    blk_root_drained_end(blk->root);
 863    return ret;
 864}
 865
 866int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
 867                      int count, BdrvRequestFlags flags)
 868{
 869    return blk_prw(blk, offset, NULL, count, blk_write_entry,
 870                   flags | BDRV_REQ_ZERO_WRITE);
 871}
 872
 873int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 874{
 875    return bdrv_make_zero(blk->root, flags);
 876}
 877
 878static void error_callback_bh(void *opaque)
 879{
 880    struct BlockBackendAIOCB *acb = opaque;
 881    qemu_bh_delete(acb->bh);
 882    acb->common.cb(acb->common.opaque, acb->ret);
 883    qemu_aio_unref(acb);
 884}
 885
 886BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 887                                  BlockCompletionFunc *cb,
 888                                  void *opaque, int ret)
 889{
 890    struct BlockBackendAIOCB *acb;
 891    QEMUBH *bh;
 892
 893    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
 894    acb->blk = blk;
 895    acb->ret = ret;
 896
 897    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
 898    acb->bh = bh;
 899    qemu_bh_schedule(bh);
 900
 901    return &acb->common;
 902}
 903
 904typedef struct BlkAioEmAIOCB {
 905    BlockAIOCB common;
 906    BlkRwCo rwco;
 907    int bytes;
 908    bool has_returned;
 909    QEMUBH* bh;
 910} BlkAioEmAIOCB;
 911
 912static const AIOCBInfo blk_aio_em_aiocb_info = {
 913    .aiocb_size         = sizeof(BlkAioEmAIOCB),
 914};
 915
 916static void blk_aio_complete(BlkAioEmAIOCB *acb)
 917{
 918    if (acb->bh) {
 919        assert(acb->has_returned);
 920        qemu_bh_delete(acb->bh);
 921    }
 922    if (acb->has_returned) {
 923        acb->common.cb(acb->common.opaque, acb->rwco.ret);
 924        qemu_aio_unref(acb);
 925    }
 926}
 927
 928static void blk_aio_complete_bh(void *opaque)
 929{
 930    blk_aio_complete(opaque);
 931}
 932
 933static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
 934                                QEMUIOVector *qiov, CoroutineEntry co_entry,
 935                                BdrvRequestFlags flags,
 936                                BlockCompletionFunc *cb, void *opaque)
 937{
 938    BlkAioEmAIOCB *acb;
 939    Coroutine *co;
 940
 941    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
 942    acb->rwco = (BlkRwCo) {
 943        .blk    = blk,
 944        .offset = offset,
 945        .qiov   = qiov,
 946        .flags  = flags,
 947        .ret    = NOT_DONE,
 948    };
 949    acb->bytes = bytes;
 950    acb->bh = NULL;
 951    acb->has_returned = false;
 952
 953    co = qemu_coroutine_create(co_entry, acb);
 954    qemu_coroutine_enter(co);
 955
 956    acb->has_returned = true;
 957    if (acb->rwco.ret != NOT_DONE) {
 958        acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
 959        qemu_bh_schedule(acb->bh);
 960    }
 961
 962    return &acb->common;
 963}
 964
 965static void blk_aio_read_entry(void *opaque)
 966{
 967    BlkAioEmAIOCB *acb = opaque;
 968    BlkRwCo *rwco = &acb->rwco;
 969
 970    assert(rwco->qiov->size == acb->bytes);
 971    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
 972                              rwco->qiov, rwco->flags);
 973    blk_aio_complete(acb);
 974}
 975
 976static void blk_aio_write_entry(void *opaque)
 977{
 978    BlkAioEmAIOCB *acb = opaque;
 979    BlkRwCo *rwco = &acb->rwco;
 980
 981    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
 982    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
 983                               rwco->qiov, rwco->flags);
 984    blk_aio_complete(acb);
 985}
 986
 987BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
 988                                  int count, BdrvRequestFlags flags,
 989                                  BlockCompletionFunc *cb, void *opaque)
 990{
 991    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
 992                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 993}
 994
 995int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
 996{
 997    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
 998    if (ret < 0) {
 999        return ret;
1000    }
1001    return count;
1002}
1003
1004int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1005               BdrvRequestFlags flags)
1006{
1007    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1008                      flags);
1009    if (ret < 0) {
1010        return ret;
1011    }
1012    return count;
1013}
1014
1015int64_t blk_getlength(BlockBackend *blk)
1016{
1017    if (!blk_is_available(blk)) {
1018        return -ENOMEDIUM;
1019    }
1020
1021    return bdrv_getlength(blk_bs(blk));
1022}
1023
1024void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1025{
1026    if (!blk_bs(blk)) {
1027        *nb_sectors_ptr = 0;
1028    } else {
1029        bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1030    }
1031}
1032
1033int64_t blk_nb_sectors(BlockBackend *blk)
1034{
1035    if (!blk_is_available(blk)) {
1036        return -ENOMEDIUM;
1037    }
1038
1039    return bdrv_nb_sectors(blk_bs(blk));
1040}
1041
1042BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1043                           QEMUIOVector *qiov, BdrvRequestFlags flags,
1044                           BlockCompletionFunc *cb, void *opaque)
1045{
1046    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1047                        blk_aio_read_entry, flags, cb, opaque);
1048}
1049
1050BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1051                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1052                            BlockCompletionFunc *cb, void *opaque)
1053{
1054    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1055                        blk_aio_write_entry, flags, cb, opaque);
1056}
1057
1058BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1059                          BlockCompletionFunc *cb, void *opaque)
1060{
1061    if (!blk_is_available(blk)) {
1062        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1063    }
1064
1065    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
1066}
1067
1068BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1069                             int64_t offset, int count,
1070                             BlockCompletionFunc *cb, void *opaque)
1071{
1072    int ret = blk_check_byte_request(blk, offset, count);
1073    if (ret < 0) {
1074        return blk_abort_aio_request(blk, cb, opaque, ret);
1075    }
1076
1077    return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
1078}
1079
1080void blk_aio_cancel(BlockAIOCB *acb)
1081{
1082    bdrv_aio_cancel(acb);
1083}
1084
1085void blk_aio_cancel_async(BlockAIOCB *acb)
1086{
1087    bdrv_aio_cancel_async(acb);
1088}
1089
1090int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1091{
1092    if (!blk_is_available(blk)) {
1093        return -ENOMEDIUM;
1094    }
1095
1096    return bdrv_ioctl(blk_bs(blk), req, buf);
1097}
1098
1099BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1100                          BlockCompletionFunc *cb, void *opaque)
1101{
1102    if (!blk_is_available(blk)) {
1103        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1104    }
1105
1106    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
1107}
1108
1109int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
1110{
1111    int ret = blk_check_byte_request(blk, offset, count);
1112    if (ret < 0) {
1113        return ret;
1114    }
1115
1116    return bdrv_co_pdiscard(blk_bs(blk), offset, count);
1117}
1118
1119int blk_co_flush(BlockBackend *blk)
1120{
1121    if (!blk_is_available(blk)) {
1122        return -ENOMEDIUM;
1123    }
1124
1125    return bdrv_co_flush(blk_bs(blk));
1126}
1127
1128int blk_flush(BlockBackend *blk)
1129{
1130    if (!blk_is_available(blk)) {
1131        return -ENOMEDIUM;
1132    }
1133
1134    return bdrv_flush(blk_bs(blk));
1135}
1136
1137void blk_drain(BlockBackend *blk)
1138{
1139    if (blk_bs(blk)) {
1140        bdrv_drain(blk_bs(blk));
1141    }
1142}
1143
1144void blk_drain_all(void)
1145{
1146    bdrv_drain_all();
1147}
1148
1149void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1150                      BlockdevOnError on_write_error)
1151{
1152    blk->on_read_error = on_read_error;
1153    blk->on_write_error = on_write_error;
1154}
1155
1156BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1157{
1158    return is_read ? blk->on_read_error : blk->on_write_error;
1159}
1160
1161BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1162                                      int error)
1163{
1164    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1165
1166    switch (on_err) {
1167    case BLOCKDEV_ON_ERROR_ENOSPC:
1168        return (error == ENOSPC) ?
1169               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1170    case BLOCKDEV_ON_ERROR_STOP:
1171        return BLOCK_ERROR_ACTION_STOP;
1172    case BLOCKDEV_ON_ERROR_REPORT:
1173        return BLOCK_ERROR_ACTION_REPORT;
1174    case BLOCKDEV_ON_ERROR_IGNORE:
1175        return BLOCK_ERROR_ACTION_IGNORE;
1176    case BLOCKDEV_ON_ERROR_AUTO:
1177    default:
1178        abort();
1179    }
1180}
1181
1182static void send_qmp_error_event(BlockBackend *blk,
1183                                 BlockErrorAction action,
1184                                 bool is_read, int error)
1185{
1186    IoOperationType optype;
1187
1188    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1189    qapi_event_send_block_io_error(blk_name(blk), optype, action,
1190                                   blk_iostatus_is_enabled(blk),
1191                                   error == ENOSPC, strerror(error),
1192                                   &error_abort);
1193}
1194
1195/* This is done by device models because, while the block layer knows
1196 * about the error, it does not know whether an operation comes from
1197 * the device or the block layer (from a job, for example).
1198 */
1199void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1200                      bool is_read, int error)
1201{
1202    assert(error >= 0);
1203
1204    if (action == BLOCK_ERROR_ACTION_STOP) {
1205        /* First set the iostatus, so that "info block" returns an iostatus
1206         * that matches the events raised so far (an additional error iostatus
1207         * is fine, but not a lost one).
1208         */
1209        blk_iostatus_set_err(blk, error);
1210
1211        /* Then raise the request to stop the VM and the event.
1212         * qemu_system_vmstop_request_prepare has two effects.  First,
1213         * it ensures that the STOP event always comes after the
1214         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1215         * can observe the STOP event and do a "cont" before the STOP
1216         * event is issued, the VM will not stop.  In this case, vm_start()
1217         * also ensures that the STOP/RESUME pair of events is emitted.
1218         */
1219        qemu_system_vmstop_request_prepare();
1220        send_qmp_error_event(blk, action, is_read, error);
1221        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1222    } else {
1223        send_qmp_error_event(blk, action, is_read, error);
1224    }
1225}
1226
1227int blk_is_read_only(BlockBackend *blk)
1228{
1229    BlockDriverState *bs = blk_bs(blk);
1230
1231    if (bs) {
1232        return bdrv_is_read_only(bs);
1233    } else {
1234        return blk->root_state.read_only;
1235    }
1236}
1237
1238int blk_is_sg(BlockBackend *blk)
1239{
1240    BlockDriverState *bs = blk_bs(blk);
1241
1242    if (!bs) {
1243        return 0;
1244    }
1245
1246    return bdrv_is_sg(bs);
1247}
1248
1249int blk_enable_write_cache(BlockBackend *blk)
1250{
1251    return blk->enable_write_cache;
1252}
1253
1254void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1255{
1256    blk->enable_write_cache = wce;
1257}
1258
1259void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1260{
1261    BlockDriverState *bs = blk_bs(blk);
1262
1263    if (!bs) {
1264        error_setg(errp, "Device '%s' has no medium", blk->name);
1265        return;
1266    }
1267
1268    bdrv_invalidate_cache(bs, errp);
1269}
1270
1271bool blk_is_inserted(BlockBackend *blk)
1272{
1273    BlockDriverState *bs = blk_bs(blk);
1274
1275    return bs && bdrv_is_inserted(bs);
1276}
1277
1278bool blk_is_available(BlockBackend *blk)
1279{
1280    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1281}
1282
1283void blk_lock_medium(BlockBackend *blk, bool locked)
1284{
1285    BlockDriverState *bs = blk_bs(blk);
1286
1287    if (bs) {
1288        bdrv_lock_medium(bs, locked);
1289    }
1290}
1291
1292void blk_eject(BlockBackend *blk, bool eject_flag)
1293{
1294    BlockDriverState *bs = blk_bs(blk);
1295
1296    if (bs) {
1297        bdrv_eject(bs, eject_flag);
1298    }
1299}
1300
1301int blk_get_flags(BlockBackend *blk)
1302{
1303    BlockDriverState *bs = blk_bs(blk);
1304
1305    if (bs) {
1306        return bdrv_get_flags(bs);
1307    } else {
1308        return blk->root_state.open_flags;
1309    }
1310}
1311
1312/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1313uint32_t blk_get_max_transfer(BlockBackend *blk)
1314{
1315    BlockDriverState *bs = blk_bs(blk);
1316    uint32_t max = 0;
1317
1318    if (bs) {
1319        max = bs->bl.max_transfer;
1320    }
1321    return MIN_NON_ZERO(max, INT_MAX);
1322}
1323
1324int blk_get_max_iov(BlockBackend *blk)
1325{
1326    return blk->root->bs->bl.max_iov;
1327}
1328
1329void blk_set_guest_block_size(BlockBackend *blk, int align)
1330{
1331    blk->guest_block_size = align;
1332}
1333
1334void *blk_try_blockalign(BlockBackend *blk, size_t size)
1335{
1336    return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1337}
1338
1339void *blk_blockalign(BlockBackend *blk, size_t size)
1340{
1341    return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1342}
1343
1344bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1345{
1346    BlockDriverState *bs = blk_bs(blk);
1347
1348    if (!bs) {
1349        return false;
1350    }
1351
1352    return bdrv_op_is_blocked(bs, op, errp);
1353}
1354
1355void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1356{
1357    BlockDriverState *bs = blk_bs(blk);
1358
1359    if (bs) {
1360        bdrv_op_unblock(bs, op, reason);
1361    }
1362}
1363
1364void blk_op_block_all(BlockBackend *blk, Error *reason)
1365{
1366    BlockDriverState *bs = blk_bs(blk);
1367
1368    if (bs) {
1369        bdrv_op_block_all(bs, reason);
1370    }
1371}
1372
1373void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1374{
1375    BlockDriverState *bs = blk_bs(blk);
1376
1377    if (bs) {
1378        bdrv_op_unblock_all(bs, reason);
1379    }
1380}
1381
1382AioContext *blk_get_aio_context(BlockBackend *blk)
1383{
1384    BlockDriverState *bs = blk_bs(blk);
1385
1386    if (bs) {
1387        return bdrv_get_aio_context(bs);
1388    } else {
1389        return qemu_get_aio_context();
1390    }
1391}
1392
1393static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1394{
1395    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1396    return blk_get_aio_context(blk_acb->blk);
1397}
1398
1399void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1400{
1401    BlockDriverState *bs = blk_bs(blk);
1402
1403    if (bs) {
1404        if (blk->public.throttle_state) {
1405            throttle_timers_detach_aio_context(&blk->public.throttle_timers);
1406        }
1407        bdrv_set_aio_context(bs, new_context);
1408        if (blk->public.throttle_state) {
1409            throttle_timers_attach_aio_context(&blk->public.throttle_timers,
1410                                               new_context);
1411        }
1412    }
1413}
1414
1415void blk_add_aio_context_notifier(BlockBackend *blk,
1416        void (*attached_aio_context)(AioContext *new_context, void *opaque),
1417        void (*detach_aio_context)(void *opaque), void *opaque)
1418{
1419    BlockDriverState *bs = blk_bs(blk);
1420
1421    if (bs) {
1422        bdrv_add_aio_context_notifier(bs, attached_aio_context,
1423                                      detach_aio_context, opaque);
1424    }
1425}
1426
1427void blk_remove_aio_context_notifier(BlockBackend *blk,
1428                                     void (*attached_aio_context)(AioContext *,
1429                                                                  void *),
1430                                     void (*detach_aio_context)(void *),
1431                                     void *opaque)
1432{
1433    BlockDriverState *bs = blk_bs(blk);
1434
1435    if (bs) {
1436        bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1437                                         detach_aio_context, opaque);
1438    }
1439}
1440
1441void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1442{
1443    notifier_list_add(&blk->remove_bs_notifiers, notify);
1444}
1445
1446void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1447{
1448    notifier_list_add(&blk->insert_bs_notifiers, notify);
1449}
1450
1451void blk_io_plug(BlockBackend *blk)
1452{
1453    BlockDriverState *bs = blk_bs(blk);
1454
1455    if (bs) {
1456        bdrv_io_plug(bs);
1457    }
1458}
1459
1460void blk_io_unplug(BlockBackend *blk)
1461{
1462    BlockDriverState *bs = blk_bs(blk);
1463
1464    if (bs) {
1465        bdrv_io_unplug(bs);
1466    }
1467}
1468
1469BlockAcctStats *blk_get_stats(BlockBackend *blk)
1470{
1471    return &blk->stats;
1472}
1473
1474void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1475                  BlockCompletionFunc *cb, void *opaque)
1476{
1477    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1478}
1479
1480int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1481                                      int count, BdrvRequestFlags flags)
1482{
1483    return blk_co_pwritev(blk, offset, count, NULL,
1484                          flags | BDRV_REQ_ZERO_WRITE);
1485}
1486
1487int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
1488                         const uint8_t *buf, int nb_sectors)
1489{
1490    int ret = blk_check_request(blk, sector_num, nb_sectors);
1491    if (ret < 0) {
1492        return ret;
1493    }
1494
1495    return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
1496}
1497
1498int blk_truncate(BlockBackend *blk, int64_t offset)
1499{
1500    if (!blk_is_available(blk)) {
1501        return -ENOMEDIUM;
1502    }
1503
1504    return bdrv_truncate(blk_bs(blk), offset);
1505}
1506
1507int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
1508{
1509    int ret = blk_check_byte_request(blk, offset, count);
1510    if (ret < 0) {
1511        return ret;
1512    }
1513
1514    return bdrv_pdiscard(blk_bs(blk), offset, count);
1515}
1516
1517int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1518                     int64_t pos, int size)
1519{
1520    int ret;
1521
1522    if (!blk_is_available(blk)) {
1523        return -ENOMEDIUM;
1524    }
1525
1526    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1527    if (ret < 0) {
1528        return ret;
1529    }
1530
1531    if (ret == size && !blk->enable_write_cache) {
1532        ret = bdrv_flush(blk_bs(blk));
1533    }
1534
1535    return ret < 0 ? ret : size;
1536}
1537
1538int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1539{
1540    if (!blk_is_available(blk)) {
1541        return -ENOMEDIUM;
1542    }
1543
1544    return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1545}
1546
1547int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1548{
1549    if (!blk_is_available(blk)) {
1550        return -ENOMEDIUM;
1551    }
1552
1553    return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1554}
1555
1556int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1557{
1558    if (!blk_is_available(blk)) {
1559        return -ENOMEDIUM;
1560    }
1561
1562    return bdrv_probe_geometry(blk_bs(blk), geo);
1563}
1564
1565/*
1566 * Updates the BlockBackendRootState object with data from the currently
1567 * attached BlockDriverState.
1568 */
1569void blk_update_root_state(BlockBackend *blk)
1570{
1571    assert(blk->root);
1572
1573    blk->root_state.open_flags    = blk->root->bs->open_flags;
1574    blk->root_state.read_only     = blk->root->bs->read_only;
1575    blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1576}
1577
1578/*
1579 * Applies the information in the root state to the given BlockDriverState. This
1580 * does not include the flags which have to be specified for bdrv_open(), use
1581 * blk_get_open_flags_from_root_state() to inquire them.
1582 */
1583void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
1584{
1585    bs->detect_zeroes = blk->root_state.detect_zeroes;
1586}
1587
1588/*
1589 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1590 * supposed to inherit the root state.
1591 */
1592int blk_get_open_flags_from_root_state(BlockBackend *blk)
1593{
1594    int bs_flags;
1595
1596    bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1597    bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1598
1599    return bs_flags;
1600}
1601
1602BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1603{
1604    return &blk->root_state;
1605}
1606
1607int blk_commit_all(void)
1608{
1609    BlockBackend *blk = NULL;
1610
1611    while ((blk = blk_all_next(blk)) != NULL) {
1612        AioContext *aio_context = blk_get_aio_context(blk);
1613
1614        aio_context_acquire(aio_context);
1615        if (blk_is_inserted(blk) && blk->root->bs->backing) {
1616            int ret = bdrv_commit(blk->root->bs);
1617            if (ret < 0) {
1618                aio_context_release(aio_context);
1619                return ret;
1620            }
1621        }
1622        aio_context_release(aio_context);
1623    }
1624    return 0;
1625}
1626
1627int blk_flush_all(void)
1628{
1629    BlockBackend *blk = NULL;
1630    int result = 0;
1631
1632    while ((blk = blk_all_next(blk)) != NULL) {
1633        AioContext *aio_context = blk_get_aio_context(blk);
1634        int ret;
1635
1636        aio_context_acquire(aio_context);
1637        if (blk_is_inserted(blk)) {
1638            ret = blk_flush(blk);
1639            if (ret < 0 && !result) {
1640                result = ret;
1641            }
1642        }
1643        aio_context_release(aio_context);
1644    }
1645
1646    return result;
1647}
1648
1649
1650/* throttling disk I/O limits */
1651void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1652{
1653    throttle_group_config(blk, cfg);
1654}
1655
1656void blk_io_limits_disable(BlockBackend *blk)
1657{
1658    assert(blk->public.throttle_state);
1659    bdrv_drained_begin(blk_bs(blk));
1660    throttle_group_unregister_blk(blk);
1661    bdrv_drained_end(blk_bs(blk));
1662}
1663
1664/* should be called before blk_set_io_limits if a limit is set */
1665void blk_io_limits_enable(BlockBackend *blk, const char *group)
1666{
1667    assert(!blk->public.throttle_state);
1668    throttle_group_register_blk(blk, group);
1669}
1670
1671void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1672{
1673    /* this BB is not part of any group */
1674    if (!blk->public.throttle_state) {
1675        return;
1676    }
1677
1678    /* this BB is a part of the same group than the one we want */
1679    if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1680        return;
1681    }
1682
1683    /* need to change the group this bs belong to */
1684    blk_io_limits_disable(blk);
1685    blk_io_limits_enable(blk, group);
1686}
1687
1688static void blk_root_drained_begin(BdrvChild *child)
1689{
1690    BlockBackend *blk = child->opaque;
1691
1692    /* Note that blk->root may not be accessible here yet if we are just
1693     * attaching to a BlockDriverState that is drained. Use child instead. */
1694
1695    if (blk->public.io_limits_disabled++ == 0) {
1696        throttle_group_restart_blk(blk);
1697    }
1698}
1699
1700static void blk_root_drained_end(BdrvChild *child)
1701{
1702    BlockBackend *blk = child->opaque;
1703
1704    assert(blk->public.io_limits_disabled);
1705    --blk->public.io_limits_disabled;
1706}
1707