qemu/block/block-backend.c
<<
>>
Prefs
   1/*
   2 * QEMU Block backends
   3 *
   4 * Copyright (C) 2014-2016 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Markus Armbruster <armbru@redhat.com>,
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1
  10 * or later.  See the COPYING.LIB file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "sysemu/block-backend.h"
  15#include "block/block_int.h"
  16#include "block/blockjob.h"
  17#include "block/throttle-groups.h"
  18#include "sysemu/blockdev.h"
  19#include "sysemu/sysemu.h"
  20#include "qapi-event.h"
  21#include "qemu/id.h"
  22#include "trace.h"
  23
  24/* Number of coroutines to reserve per attached device model */
  25#define COROUTINE_POOL_RESERVATION 64
  26
  27#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  28
  29static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
  30
  31struct BlockBackend {
  32    char *name;
  33    int refcnt;
  34    BdrvChild *root;
  35    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
  36    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
  37    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
  38    BlockBackendPublic public;
  39
  40    void *dev;                  /* attached device model, if any */
  41    bool legacy_dev;            /* true if dev is not a DeviceState */
  42    /* TODO change to DeviceState when all users are qdevified */
  43    const BlockDevOps *dev_ops;
  44    void *dev_opaque;
  45
  46    /* the block size for which the guest device expects atomicity */
  47    int guest_block_size;
  48
  49    /* If the BDS tree is removed, some of its options are stored here (which
  50     * can be used to restore those options in the new BDS on insert) */
  51    BlockBackendRootState root_state;
  52
  53    bool enable_write_cache;
  54
  55    /* I/O stats (display with "info blockstats"). */
  56    BlockAcctStats stats;
  57
  58    BlockdevOnError on_read_error, on_write_error;
  59    bool iostatus_enabled;
  60    BlockDeviceIoStatus iostatus;
  61
  62    bool allow_write_beyond_eof;
  63
  64    NotifierList remove_bs_notifiers, insert_bs_notifiers;
  65};
  66
  67typedef struct BlockBackendAIOCB {
  68    BlockAIOCB common;
  69    BlockBackend *blk;
  70    int ret;
  71} BlockBackendAIOCB;
  72
  73static const AIOCBInfo block_backend_aiocb_info = {
  74    .get_aio_context = blk_aiocb_get_aio_context,
  75    .aiocb_size = sizeof(BlockBackendAIOCB),
  76};
  77
  78static void drive_info_del(DriveInfo *dinfo);
  79static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
  80
  81/* All BlockBackends */
  82static QTAILQ_HEAD(, BlockBackend) block_backends =
  83    QTAILQ_HEAD_INITIALIZER(block_backends);
  84
  85/* All BlockBackends referenced by the monitor and which are iterated through by
  86 * blk_next() */
  87static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
  88    QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
  89
  90static void blk_root_inherit_options(int *child_flags, QDict *child_options,
  91                                     int parent_flags, QDict *parent_options)
  92{
  93    /* We're not supposed to call this function for root nodes */
  94    abort();
  95}
  96static void blk_root_drained_begin(BdrvChild *child);
  97static void blk_root_drained_end(BdrvChild *child);
  98
  99static void blk_root_change_media(BdrvChild *child, bool load);
 100static void blk_root_resize(BdrvChild *child);
 101
 102static const char *blk_root_get_name(BdrvChild *child)
 103{
 104    return blk_name(child->opaque);
 105}
 106
 107static const BdrvChildRole child_root = {
 108    .inherit_options    = blk_root_inherit_options,
 109
 110    .change_media       = blk_root_change_media,
 111    .resize             = blk_root_resize,
 112    .get_name           = blk_root_get_name,
 113
 114    .drained_begin      = blk_root_drained_begin,
 115    .drained_end        = blk_root_drained_end,
 116};
 117
 118/*
 119 * Create a new BlockBackend with a reference count of one.
 120 * Store an error through @errp on failure, unless it's null.
 121 * Return the new BlockBackend on success, null on failure.
 122 */
 123BlockBackend *blk_new(void)
 124{
 125    BlockBackend *blk;
 126
 127    blk = g_new0(BlockBackend, 1);
 128    blk->refcnt = 1;
 129    blk_set_enable_write_cache(blk, true);
 130
 131    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
 132    qemu_co_queue_init(&blk->public.throttled_reqs[1]);
 133
 134    notifier_list_init(&blk->remove_bs_notifiers);
 135    notifier_list_init(&blk->insert_bs_notifiers);
 136
 137    QTAILQ_INSERT_TAIL(&block_backends, blk, link);
 138    return blk;
 139}
 140
 141/*
 142 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
 143 *
 144 * Just as with bdrv_open(), after having called this function the reference to
 145 * @options belongs to the block layer (even on failure).
 146 *
 147 * TODO: Remove @filename and @flags; it should be possible to specify a whole
 148 * BDS tree just by specifying the @options QDict (or @reference,
 149 * alternatively). At the time of adding this function, this is not possible,
 150 * though, so callers of this function have to be able to specify @filename and
 151 * @flags.
 152 */
 153BlockBackend *blk_new_open(const char *filename, const char *reference,
 154                           QDict *options, int flags, Error **errp)
 155{
 156    BlockBackend *blk;
 157    BlockDriverState *bs;
 158
 159    blk = blk_new();
 160    bs = bdrv_open(filename, reference, options, flags, errp);
 161    if (!bs) {
 162        blk_unref(blk);
 163        return NULL;
 164    }
 165
 166    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 167
 168    return blk;
 169}
 170
 171static void blk_delete(BlockBackend *blk)
 172{
 173    assert(!blk->refcnt);
 174    assert(!blk->name);
 175    assert(!blk->dev);
 176    if (blk->root) {
 177        blk_remove_bs(blk);
 178    }
 179    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
 180    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
 181    QTAILQ_REMOVE(&block_backends, blk, link);
 182    drive_info_del(blk->legacy_dinfo);
 183    block_acct_cleanup(&blk->stats);
 184    g_free(blk);
 185}
 186
 187static void drive_info_del(DriveInfo *dinfo)
 188{
 189    if (!dinfo) {
 190        return;
 191    }
 192    qemu_opts_del(dinfo->opts);
 193    g_free(dinfo->serial);
 194    g_free(dinfo);
 195}
 196
 197int blk_get_refcnt(BlockBackend *blk)
 198{
 199    return blk ? blk->refcnt : 0;
 200}
 201
 202/*
 203 * Increment @blk's reference count.
 204 * @blk must not be null.
 205 */
 206void blk_ref(BlockBackend *blk)
 207{
 208    blk->refcnt++;
 209}
 210
 211/*
 212 * Decrement @blk's reference count.
 213 * If this drops it to zero, destroy @blk.
 214 * For convenience, do nothing if @blk is null.
 215 */
 216void blk_unref(BlockBackend *blk)
 217{
 218    if (blk) {
 219        assert(blk->refcnt > 0);
 220        if (!--blk->refcnt) {
 221            blk_delete(blk);
 222        }
 223    }
 224}
 225
 226/*
 227 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 228 * ones which are hidden (i.e. are not referenced by the monitor).
 229 */
 230static BlockBackend *blk_all_next(BlockBackend *blk)
 231{
 232    return blk ? QTAILQ_NEXT(blk, link)
 233               : QTAILQ_FIRST(&block_backends);
 234}
 235
 236void blk_remove_all_bs(void)
 237{
 238    BlockBackend *blk = NULL;
 239
 240    while ((blk = blk_all_next(blk)) != NULL) {
 241        AioContext *ctx = blk_get_aio_context(blk);
 242
 243        aio_context_acquire(ctx);
 244        if (blk->root) {
 245            blk_remove_bs(blk);
 246        }
 247        aio_context_release(ctx);
 248    }
 249}
 250
 251/*
 252 * Return the monitor-owned BlockBackend after @blk.
 253 * If @blk is null, return the first one.
 254 * Else, return @blk's next sibling, which may be null.
 255 *
 256 * To iterate over all BlockBackends, do
 257 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 258 *     ...
 259 * }
 260 */
 261BlockBackend *blk_next(BlockBackend *blk)
 262{
 263    return blk ? QTAILQ_NEXT(blk, monitor_link)
 264               : QTAILQ_FIRST(&monitor_block_backends);
 265}
 266
 267/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
 268 * the monitor or attached to a BlockBackend */
 269BlockDriverState *bdrv_next(BdrvNextIterator *it)
 270{
 271    BlockDriverState *bs;
 272
 273    /* First, return all root nodes of BlockBackends. In order to avoid
 274     * returning a BDS twice when multiple BBs refer to it, we only return it
 275     * if the BB is the first one in the parent list of the BDS. */
 276    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
 277        do {
 278            it->blk = blk_all_next(it->blk);
 279            bs = it->blk ? blk_bs(it->blk) : NULL;
 280        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 281
 282        if (bs) {
 283            return bs;
 284        }
 285        it->phase = BDRV_NEXT_MONITOR_OWNED;
 286    }
 287
 288    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
 289     * BDSes that are attached to a BlockBackend here; they have been handled
 290     * by the above block already */
 291    do {
 292        it->bs = bdrv_next_monitor_owned(it->bs);
 293        bs = it->bs;
 294    } while (bs && bdrv_has_blk(bs));
 295
 296    return bs;
 297}
 298
 299BlockDriverState *bdrv_first(BdrvNextIterator *it)
 300{
 301    *it = (BdrvNextIterator) {
 302        .phase = BDRV_NEXT_BACKEND_ROOTS,
 303    };
 304
 305    return bdrv_next(it);
 306}
 307
 308/*
 309 * Add a BlockBackend into the list of backends referenced by the monitor, with
 310 * the given @name acting as the handle for the monitor.
 311 * Strictly for use by blockdev.c.
 312 *
 313 * @name must not be null or empty.
 314 *
 315 * Returns true on success and false on failure. In the latter case, an Error
 316 * object is returned through @errp.
 317 */
 318bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
 319{
 320    assert(!blk->name);
 321    assert(name && name[0]);
 322
 323    if (!id_wellformed(name)) {
 324        error_setg(errp, "Invalid device name");
 325        return false;
 326    }
 327    if (blk_by_name(name)) {
 328        error_setg(errp, "Device with id '%s' already exists", name);
 329        return false;
 330    }
 331    if (bdrv_find_node(name)) {
 332        error_setg(errp,
 333                   "Device name '%s' conflicts with an existing node name",
 334                   name);
 335        return false;
 336    }
 337
 338    blk->name = g_strdup(name);
 339    QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
 340    return true;
 341}
 342
 343/*
 344 * Remove a BlockBackend from the list of backends referenced by the monitor.
 345 * Strictly for use by blockdev.c.
 346 */
 347void monitor_remove_blk(BlockBackend *blk)
 348{
 349    if (!blk->name) {
 350        return;
 351    }
 352
 353    QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
 354    g_free(blk->name);
 355    blk->name = NULL;
 356}
 357
 358/*
 359 * Return @blk's name, a non-null string.
 360 * Returns an empty string iff @blk is not referenced by the monitor.
 361 */
 362const char *blk_name(BlockBackend *blk)
 363{
 364    return blk->name ?: "";
 365}
 366
 367/*
 368 * Return the BlockBackend with name @name if it exists, else null.
 369 * @name must not be null.
 370 */
 371BlockBackend *blk_by_name(const char *name)
 372{
 373    BlockBackend *blk = NULL;
 374
 375    assert(name);
 376    while ((blk = blk_next(blk)) != NULL) {
 377        if (!strcmp(name, blk->name)) {
 378            return blk;
 379        }
 380    }
 381    return NULL;
 382}
 383
 384/*
 385 * Return the BlockDriverState attached to @blk if any, else null.
 386 */
 387BlockDriverState *blk_bs(BlockBackend *blk)
 388{
 389    return blk->root ? blk->root->bs : NULL;
 390}
 391
 392static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
 393{
 394    BdrvChild *child;
 395    QLIST_FOREACH(child, &bs->parents, next_parent) {
 396        if (child->role == &child_root) {
 397            return child->opaque;
 398        }
 399    }
 400
 401    return NULL;
 402}
 403
 404/*
 405 * Returns true if @bs has an associated BlockBackend.
 406 */
 407bool bdrv_has_blk(BlockDriverState *bs)
 408{
 409    return bdrv_first_blk(bs) != NULL;
 410}
 411
 412/*
 413 * Returns true if @bs has only BlockBackends as parents.
 414 */
 415bool bdrv_is_root_node(BlockDriverState *bs)
 416{
 417    BdrvChild *c;
 418
 419    QLIST_FOREACH(c, &bs->parents, next_parent) {
 420        if (c->role != &child_root) {
 421            return false;
 422        }
 423    }
 424
 425    return true;
 426}
 427
 428/*
 429 * Return @blk's DriveInfo if any, else null.
 430 */
 431DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 432{
 433    return blk->legacy_dinfo;
 434}
 435
 436/*
 437 * Set @blk's DriveInfo to @dinfo, and return it.
 438 * @blk must not have a DriveInfo set already.
 439 * No other BlockBackend may have the same DriveInfo set.
 440 */
 441DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 442{
 443    assert(!blk->legacy_dinfo);
 444    return blk->legacy_dinfo = dinfo;
 445}
 446
 447/*
 448 * Return the BlockBackend with DriveInfo @dinfo.
 449 * It must exist.
 450 */
 451BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 452{
 453    BlockBackend *blk = NULL;
 454
 455    while ((blk = blk_next(blk)) != NULL) {
 456        if (blk->legacy_dinfo == dinfo) {
 457            return blk;
 458        }
 459    }
 460    abort();
 461}
 462
 463/*
 464 * Returns a pointer to the publicly accessible fields of @blk.
 465 */
 466BlockBackendPublic *blk_get_public(BlockBackend *blk)
 467{
 468    return &blk->public;
 469}
 470
 471/*
 472 * Returns a BlockBackend given the associated @public fields.
 473 */
 474BlockBackend *blk_by_public(BlockBackendPublic *public)
 475{
 476    return container_of(public, BlockBackend, public);
 477}
 478
 479/*
 480 * Disassociates the currently associated BlockDriverState from @blk.
 481 */
 482void blk_remove_bs(BlockBackend *blk)
 483{
 484    notifier_list_notify(&blk->remove_bs_notifiers, blk);
 485    if (blk->public.throttle_state) {
 486        throttle_timers_detach_aio_context(&blk->public.throttle_timers);
 487    }
 488
 489    blk_update_root_state(blk);
 490
 491    bdrv_root_unref_child(blk->root);
 492    blk->root = NULL;
 493}
 494
 495/*
 496 * Associates a new BlockDriverState with @blk.
 497 */
 498void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 499{
 500    bdrv_ref(bs);
 501    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 502
 503    notifier_list_notify(&blk->insert_bs_notifiers, blk);
 504    if (blk->public.throttle_state) {
 505        throttle_timers_attach_aio_context(
 506            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
 507    }
 508}
 509
 510static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 511{
 512    if (blk->dev) {
 513        return -EBUSY;
 514    }
 515    blk_ref(blk);
 516    blk->dev = dev;
 517    blk->legacy_dev = false;
 518    blk_iostatus_reset(blk);
 519    return 0;
 520}
 521
 522/*
 523 * Attach device model @dev to @blk.
 524 * Return 0 on success, -EBUSY when a device model is attached already.
 525 */
 526int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
 527{
 528    return blk_do_attach_dev(blk, dev);
 529}
 530
 531/*
 532 * Attach device model @dev to @blk.
 533 * @blk must not have a device model attached already.
 534 * TODO qdevified devices don't use this, remove when devices are qdevified
 535 */
 536void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
 537{
 538    if (blk_do_attach_dev(blk, dev) < 0) {
 539        abort();
 540    }
 541    blk->legacy_dev = true;
 542}
 543
 544/*
 545 * Detach device model @dev from @blk.
 546 * @dev must be currently attached to @blk.
 547 */
 548void blk_detach_dev(BlockBackend *blk, void *dev)
 549/* TODO change to DeviceState *dev when all users are qdevified */
 550{
 551    assert(blk->dev == dev);
 552    blk->dev = NULL;
 553    blk->dev_ops = NULL;
 554    blk->dev_opaque = NULL;
 555    blk->guest_block_size = 512;
 556    blk_unref(blk);
 557}
 558
 559/*
 560 * Return the device model attached to @blk if any, else null.
 561 */
 562void *blk_get_attached_dev(BlockBackend *blk)
 563/* TODO change to return DeviceState * when all users are qdevified */
 564{
 565    return blk->dev;
 566}
 567
 568/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
 569 * device attached to the BlockBackend. */
 570static char *blk_get_attached_dev_id(BlockBackend *blk)
 571{
 572    DeviceState *dev;
 573
 574    assert(!blk->legacy_dev);
 575    dev = blk->dev;
 576
 577    if (!dev) {
 578        return g_strdup("");
 579    } else if (dev->id) {
 580        return g_strdup(dev->id);
 581    }
 582    return object_get_canonical_path(OBJECT(dev));
 583}
 584
 585/*
 586 * Return the BlockBackend which has the device model @dev attached if it
 587 * exists, else null.
 588 *
 589 * @dev must not be null.
 590 */
 591BlockBackend *blk_by_dev(void *dev)
 592{
 593    BlockBackend *blk = NULL;
 594
 595    assert(dev != NULL);
 596    while ((blk = blk_all_next(blk)) != NULL) {
 597        if (blk->dev == dev) {
 598            return blk;
 599        }
 600    }
 601    return NULL;
 602}
 603
 604/*
 605 * Set @blk's device model callbacks to @ops.
 606 * @opaque is the opaque argument to pass to the callbacks.
 607 * This is for use by device models.
 608 */
 609void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
 610                     void *opaque)
 611{
 612    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
 613     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
 614     * is set. */
 615    assert(!blk->legacy_dev);
 616
 617    blk->dev_ops = ops;
 618    blk->dev_opaque = opaque;
 619}
 620
 621/*
 622 * Notify @blk's attached device model of media change.
 623 * If @load is true, notify of media load.
 624 * Else, notify of media eject.
 625 * Also send DEVICE_TRAY_MOVED events as appropriate.
 626 */
 627void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 628{
 629    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
 630        bool tray_was_open, tray_is_open;
 631
 632        assert(!blk->legacy_dev);
 633
 634        tray_was_open = blk_dev_is_tray_open(blk);
 635        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
 636        tray_is_open = blk_dev_is_tray_open(blk);
 637
 638        if (tray_was_open != tray_is_open) {
 639            char *id = blk_get_attached_dev_id(blk);
 640            qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
 641                                              &error_abort);
 642            g_free(id);
 643        }
 644    }
 645}
 646
 647static void blk_root_change_media(BdrvChild *child, bool load)
 648{
 649    blk_dev_change_media_cb(child->opaque, load);
 650}
 651
 652/*
 653 * Does @blk's attached device model have removable media?
 654 * %true if no device model is attached.
 655 */
 656bool blk_dev_has_removable_media(BlockBackend *blk)
 657{
 658    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
 659}
 660
 661/*
 662 * Does @blk's attached device model have a tray?
 663 */
 664bool blk_dev_has_tray(BlockBackend *blk)
 665{
 666    return blk->dev_ops && blk->dev_ops->is_tray_open;
 667}
 668
 669/*
 670 * Notify @blk's attached device model of a media eject request.
 671 * If @force is true, the medium is about to be yanked out forcefully.
 672 */
 673void blk_dev_eject_request(BlockBackend *blk, bool force)
 674{
 675    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
 676        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
 677    }
 678}
 679
 680/*
 681 * Does @blk's attached device model have a tray, and is it open?
 682 */
 683bool blk_dev_is_tray_open(BlockBackend *blk)
 684{
 685    if (blk_dev_has_tray(blk)) {
 686        return blk->dev_ops->is_tray_open(blk->dev_opaque);
 687    }
 688    return false;
 689}
 690
 691/*
 692 * Does @blk's attached device model have the medium locked?
 693 * %false if the device model has no such lock.
 694 */
 695bool blk_dev_is_medium_locked(BlockBackend *blk)
 696{
 697    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
 698        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
 699    }
 700    return false;
 701}
 702
 703/*
 704 * Notify @blk's attached device model of a backend size change.
 705 */
 706static void blk_root_resize(BdrvChild *child)
 707{
 708    BlockBackend *blk = child->opaque;
 709
 710    if (blk->dev_ops && blk->dev_ops->resize_cb) {
 711        blk->dev_ops->resize_cb(blk->dev_opaque);
 712    }
 713}
 714
 715void blk_iostatus_enable(BlockBackend *blk)
 716{
 717    blk->iostatus_enabled = true;
 718    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 719}
 720
 721/* The I/O status is only enabled if the drive explicitly
 722 * enables it _and_ the VM is configured to stop on errors */
 723bool blk_iostatus_is_enabled(const BlockBackend *blk)
 724{
 725    return (blk->iostatus_enabled &&
 726           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
 727            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
 728            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 729}
 730
 731BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
 732{
 733    return blk->iostatus;
 734}
 735
 736void blk_iostatus_disable(BlockBackend *blk)
 737{
 738    blk->iostatus_enabled = false;
 739}
 740
 741void blk_iostatus_reset(BlockBackend *blk)
 742{
 743    if (blk_iostatus_is_enabled(blk)) {
 744        BlockDriverState *bs = blk_bs(blk);
 745        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 746        if (bs && bs->job) {
 747            block_job_iostatus_reset(bs->job);
 748        }
 749    }
 750}
 751
 752void blk_iostatus_set_err(BlockBackend *blk, int error)
 753{
 754    assert(blk_iostatus_is_enabled(blk));
 755    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 756        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
 757                                          BLOCK_DEVICE_IO_STATUS_FAILED;
 758    }
 759}
 760
 761void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
 762{
 763    blk->allow_write_beyond_eof = allow;
 764}
 765
 766static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
 767                                  size_t size)
 768{
 769    int64_t len;
 770
 771    if (size > INT_MAX) {
 772        return -EIO;
 773    }
 774
 775    if (!blk_is_available(blk)) {
 776        return -ENOMEDIUM;
 777    }
 778
 779    if (offset < 0) {
 780        return -EIO;
 781    }
 782
 783    if (!blk->allow_write_beyond_eof) {
 784        len = blk_getlength(blk);
 785        if (len < 0) {
 786            return len;
 787        }
 788
 789        if (offset > len || len - offset < size) {
 790            return -EIO;
 791        }
 792    }
 793
 794    return 0;
 795}
 796
 797int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
 798                               unsigned int bytes, QEMUIOVector *qiov,
 799                               BdrvRequestFlags flags)
 800{
 801    int ret;
 802    BlockDriverState *bs = blk_bs(blk);
 803
 804    trace_blk_co_preadv(blk, bs, offset, bytes, flags);
 805
 806    ret = blk_check_byte_request(blk, offset, bytes);
 807    if (ret < 0) {
 808        return ret;
 809    }
 810
 811    bdrv_inc_in_flight(bs);
 812
 813    /* throttling disk I/O */
 814    if (blk->public.throttle_state) {
 815        throttle_group_co_io_limits_intercept(blk, bytes, false);
 816    }
 817
 818    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
 819    bdrv_dec_in_flight(bs);
 820    return ret;
 821}
 822
 823int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
 824                                unsigned int bytes, QEMUIOVector *qiov,
 825                                BdrvRequestFlags flags)
 826{
 827    int ret;
 828    BlockDriverState *bs = blk_bs(blk);
 829
 830    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
 831
 832    ret = blk_check_byte_request(blk, offset, bytes);
 833    if (ret < 0) {
 834        return ret;
 835    }
 836
 837    bdrv_inc_in_flight(bs);
 838
 839    /* throttling disk I/O */
 840    if (blk->public.throttle_state) {
 841        throttle_group_co_io_limits_intercept(blk, bytes, true);
 842    }
 843
 844    if (!blk->enable_write_cache) {
 845        flags |= BDRV_REQ_FUA;
 846    }
 847
 848    ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
 849    bdrv_dec_in_flight(bs);
 850    return ret;
 851}
 852
 853typedef struct BlkRwCo {
 854    BlockBackend *blk;
 855    int64_t offset;
 856    QEMUIOVector *qiov;
 857    int ret;
 858    BdrvRequestFlags flags;
 859} BlkRwCo;
 860
 861static void blk_read_entry(void *opaque)
 862{
 863    BlkRwCo *rwco = opaque;
 864
 865    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
 866                              rwco->qiov, rwco->flags);
 867}
 868
 869static void blk_write_entry(void *opaque)
 870{
 871    BlkRwCo *rwco = opaque;
 872
 873    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
 874                               rwco->qiov, rwco->flags);
 875}
 876
 877static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 878                   int64_t bytes, CoroutineEntry co_entry,
 879                   BdrvRequestFlags flags)
 880{
 881    QEMUIOVector qiov;
 882    struct iovec iov;
 883    Coroutine *co;
 884    BlkRwCo rwco;
 885
 886    iov = (struct iovec) {
 887        .iov_base = buf,
 888        .iov_len = bytes,
 889    };
 890    qemu_iovec_init_external(&qiov, &iov, 1);
 891
 892    rwco = (BlkRwCo) {
 893        .blk    = blk,
 894        .offset = offset,
 895        .qiov   = &qiov,
 896        .flags  = flags,
 897        .ret    = NOT_DONE,
 898    };
 899
 900    co = qemu_coroutine_create(co_entry, &rwco);
 901    qemu_coroutine_enter(co);
 902    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
 903
 904    return rwco.ret;
 905}
 906
 907int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
 908                          int count)
 909{
 910    int ret;
 911
 912    ret = blk_check_byte_request(blk, offset, count);
 913    if (ret < 0) {
 914        return ret;
 915    }
 916
 917    blk_root_drained_begin(blk->root);
 918    ret = blk_pread(blk, offset, buf, count);
 919    blk_root_drained_end(blk->root);
 920    return ret;
 921}
 922
 923int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
 924                      int count, BdrvRequestFlags flags)
 925{
 926    return blk_prw(blk, offset, NULL, count, blk_write_entry,
 927                   flags | BDRV_REQ_ZERO_WRITE);
 928}
 929
 930int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 931{
 932    return bdrv_make_zero(blk->root, flags);
 933}
 934
 935static void error_callback_bh(void *opaque)
 936{
 937    struct BlockBackendAIOCB *acb = opaque;
 938
 939    bdrv_dec_in_flight(acb->common.bs);
 940    acb->common.cb(acb->common.opaque, acb->ret);
 941    qemu_aio_unref(acb);
 942}
 943
 944BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 945                                  BlockCompletionFunc *cb,
 946                                  void *opaque, int ret)
 947{
 948    struct BlockBackendAIOCB *acb;
 949
 950    bdrv_inc_in_flight(blk_bs(blk));
 951    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
 952    acb->blk = blk;
 953    acb->ret = ret;
 954
 955    aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
 956    return &acb->common;
 957}
 958
 959typedef struct BlkAioEmAIOCB {
 960    BlockAIOCB common;
 961    BlkRwCo rwco;
 962    int bytes;
 963    bool has_returned;
 964} BlkAioEmAIOCB;
 965
 966static const AIOCBInfo blk_aio_em_aiocb_info = {
 967    .aiocb_size         = sizeof(BlkAioEmAIOCB),
 968};
 969
 970static void blk_aio_complete(BlkAioEmAIOCB *acb)
 971{
 972    if (acb->has_returned) {
 973        bdrv_dec_in_flight(acb->common.bs);
 974        acb->common.cb(acb->common.opaque, acb->rwco.ret);
 975        qemu_aio_unref(acb);
 976    }
 977}
 978
 979static void blk_aio_complete_bh(void *opaque)
 980{
 981    BlkAioEmAIOCB *acb = opaque;
 982
 983    assert(acb->has_returned);
 984    blk_aio_complete(acb);
 985}
 986
 987static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
 988                                QEMUIOVector *qiov, CoroutineEntry co_entry,
 989                                BdrvRequestFlags flags,
 990                                BlockCompletionFunc *cb, void *opaque)
 991{
 992    BlkAioEmAIOCB *acb;
 993    Coroutine *co;
 994
 995    bdrv_inc_in_flight(blk_bs(blk));
 996    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
 997    acb->rwco = (BlkRwCo) {
 998        .blk    = blk,
 999        .offset = offset,
1000        .qiov   = qiov,
1001        .flags  = flags,
1002        .ret    = NOT_DONE,
1003    };
1004    acb->bytes = bytes;
1005    acb->has_returned = false;
1006
1007    co = qemu_coroutine_create(co_entry, acb);
1008    qemu_coroutine_enter(co);
1009
1010    acb->has_returned = true;
1011    if (acb->rwco.ret != NOT_DONE) {
1012        aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1013                                blk_aio_complete_bh, acb);
1014    }
1015
1016    return &acb->common;
1017}
1018
1019static void blk_aio_read_entry(void *opaque)
1020{
1021    BlkAioEmAIOCB *acb = opaque;
1022    BlkRwCo *rwco = &acb->rwco;
1023
1024    assert(rwco->qiov->size == acb->bytes);
1025    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1026                              rwco->qiov, rwco->flags);
1027    blk_aio_complete(acb);
1028}
1029
1030static void blk_aio_write_entry(void *opaque)
1031{
1032    BlkAioEmAIOCB *acb = opaque;
1033    BlkRwCo *rwco = &acb->rwco;
1034
1035    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
1036    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1037                               rwco->qiov, rwco->flags);
1038    blk_aio_complete(acb);
1039}
1040
1041BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1042                                  int count, BdrvRequestFlags flags,
1043                                  BlockCompletionFunc *cb, void *opaque)
1044{
1045    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1046                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1047}
1048
1049int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1050{
1051    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1052    if (ret < 0) {
1053        return ret;
1054    }
1055    return count;
1056}
1057
1058int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1059               BdrvRequestFlags flags)
1060{
1061    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1062                      flags);
1063    if (ret < 0) {
1064        return ret;
1065    }
1066    return count;
1067}
1068
1069int64_t blk_getlength(BlockBackend *blk)
1070{
1071    if (!blk_is_available(blk)) {
1072        return -ENOMEDIUM;
1073    }
1074
1075    return bdrv_getlength(blk_bs(blk));
1076}
1077
1078void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1079{
1080    if (!blk_bs(blk)) {
1081        *nb_sectors_ptr = 0;
1082    } else {
1083        bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1084    }
1085}
1086
1087int64_t blk_nb_sectors(BlockBackend *blk)
1088{
1089    if (!blk_is_available(blk)) {
1090        return -ENOMEDIUM;
1091    }
1092
1093    return bdrv_nb_sectors(blk_bs(blk));
1094}
1095
1096BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1097                           QEMUIOVector *qiov, BdrvRequestFlags flags,
1098                           BlockCompletionFunc *cb, void *opaque)
1099{
1100    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1101                        blk_aio_read_entry, flags, cb, opaque);
1102}
1103
1104BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1105                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1106                            BlockCompletionFunc *cb, void *opaque)
1107{
1108    return blk_aio_prwv(blk, offset, qiov->size, qiov,
1109                        blk_aio_write_entry, flags, cb, opaque);
1110}
1111
1112static void blk_aio_flush_entry(void *opaque)
1113{
1114    BlkAioEmAIOCB *acb = opaque;
1115    BlkRwCo *rwco = &acb->rwco;
1116
1117    rwco->ret = blk_co_flush(rwco->blk);
1118    blk_aio_complete(acb);
1119}
1120
1121BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1122                          BlockCompletionFunc *cb, void *opaque)
1123{
1124    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1125}
1126
1127static void blk_aio_pdiscard_entry(void *opaque)
1128{
1129    BlkAioEmAIOCB *acb = opaque;
1130    BlkRwCo *rwco = &acb->rwco;
1131
1132    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1133    blk_aio_complete(acb);
1134}
1135
1136BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1137                             int64_t offset, int count,
1138                             BlockCompletionFunc *cb, void *opaque)
1139{
1140    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
1141                        cb, opaque);
1142}
1143
1144void blk_aio_cancel(BlockAIOCB *acb)
1145{
1146    bdrv_aio_cancel(acb);
1147}
1148
1149void blk_aio_cancel_async(BlockAIOCB *acb)
1150{
1151    bdrv_aio_cancel_async(acb);
1152}
1153
1154int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1155{
1156    if (!blk_is_available(blk)) {
1157        return -ENOMEDIUM;
1158    }
1159
1160    return bdrv_co_ioctl(blk_bs(blk), req, buf);
1161}
1162
1163static void blk_ioctl_entry(void *opaque)
1164{
1165    BlkRwCo *rwco = opaque;
1166    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1167                             rwco->qiov->iov[0].iov_base);
1168}
1169
1170int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1171{
1172    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
1173}
1174
1175static void blk_aio_ioctl_entry(void *opaque)
1176{
1177    BlkAioEmAIOCB *acb = opaque;
1178    BlkRwCo *rwco = &acb->rwco;
1179
1180    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1181                             rwco->qiov->iov[0].iov_base);
1182    blk_aio_complete(acb);
1183}
1184
1185BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1186                          BlockCompletionFunc *cb, void *opaque)
1187{
1188    QEMUIOVector qiov;
1189    struct iovec iov;
1190
1191    iov = (struct iovec) {
1192        .iov_base = buf,
1193        .iov_len = 0,
1194    };
1195    qemu_iovec_init_external(&qiov, &iov, 1);
1196
1197    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
1198}
1199
1200int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
1201{
1202    int ret = blk_check_byte_request(blk, offset, count);
1203    if (ret < 0) {
1204        return ret;
1205    }
1206
1207    return bdrv_co_pdiscard(blk_bs(blk), offset, count);
1208}
1209
1210int blk_co_flush(BlockBackend *blk)
1211{
1212    if (!blk_is_available(blk)) {
1213        return -ENOMEDIUM;
1214    }
1215
1216    return bdrv_co_flush(blk_bs(blk));
1217}
1218
1219static void blk_flush_entry(void *opaque)
1220{
1221    BlkRwCo *rwco = opaque;
1222    rwco->ret = blk_co_flush(rwco->blk);
1223}
1224
1225int blk_flush(BlockBackend *blk)
1226{
1227    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
1228}
1229
1230void blk_drain(BlockBackend *blk)
1231{
1232    if (blk_bs(blk)) {
1233        bdrv_drain(blk_bs(blk));
1234    }
1235}
1236
1237void blk_drain_all(void)
1238{
1239    bdrv_drain_all();
1240}
1241
1242void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1243                      BlockdevOnError on_write_error)
1244{
1245    blk->on_read_error = on_read_error;
1246    blk->on_write_error = on_write_error;
1247}
1248
1249BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1250{
1251    return is_read ? blk->on_read_error : blk->on_write_error;
1252}
1253
1254BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1255                                      int error)
1256{
1257    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1258
1259    switch (on_err) {
1260    case BLOCKDEV_ON_ERROR_ENOSPC:
1261        return (error == ENOSPC) ?
1262               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1263    case BLOCKDEV_ON_ERROR_STOP:
1264        return BLOCK_ERROR_ACTION_STOP;
1265    case BLOCKDEV_ON_ERROR_REPORT:
1266        return BLOCK_ERROR_ACTION_REPORT;
1267    case BLOCKDEV_ON_ERROR_IGNORE:
1268        return BLOCK_ERROR_ACTION_IGNORE;
1269    case BLOCKDEV_ON_ERROR_AUTO:
1270    default:
1271        abort();
1272    }
1273}
1274
1275static void send_qmp_error_event(BlockBackend *blk,
1276                                 BlockErrorAction action,
1277                                 bool is_read, int error)
1278{
1279    IoOperationType optype;
1280
1281    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1282    qapi_event_send_block_io_error(blk_name(blk),
1283                                   bdrv_get_node_name(blk_bs(blk)), optype,
1284                                   action, blk_iostatus_is_enabled(blk),
1285                                   error == ENOSPC, strerror(error),
1286                                   &error_abort);
1287}
1288
1289/* This is done by device models because, while the block layer knows
1290 * about the error, it does not know whether an operation comes from
1291 * the device or the block layer (from a job, for example).
1292 */
1293void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1294                      bool is_read, int error)
1295{
1296    assert(error >= 0);
1297
1298    if (action == BLOCK_ERROR_ACTION_STOP) {
1299        /* First set the iostatus, so that "info block" returns an iostatus
1300         * that matches the events raised so far (an additional error iostatus
1301         * is fine, but not a lost one).
1302         */
1303        blk_iostatus_set_err(blk, error);
1304
1305        /* Then raise the request to stop the VM and the event.
1306         * qemu_system_vmstop_request_prepare has two effects.  First,
1307         * it ensures that the STOP event always comes after the
1308         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1309         * can observe the STOP event and do a "cont" before the STOP
1310         * event is issued, the VM will not stop.  In this case, vm_start()
1311         * also ensures that the STOP/RESUME pair of events is emitted.
1312         */
1313        qemu_system_vmstop_request_prepare();
1314        send_qmp_error_event(blk, action, is_read, error);
1315        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1316    } else {
1317        send_qmp_error_event(blk, action, is_read, error);
1318    }
1319}
1320
1321int blk_is_read_only(BlockBackend *blk)
1322{
1323    BlockDriverState *bs = blk_bs(blk);
1324
1325    if (bs) {
1326        return bdrv_is_read_only(bs);
1327    } else {
1328        return blk->root_state.read_only;
1329    }
1330}
1331
1332int blk_is_sg(BlockBackend *blk)
1333{
1334    BlockDriverState *bs = blk_bs(blk);
1335
1336    if (!bs) {
1337        return 0;
1338    }
1339
1340    return bdrv_is_sg(bs);
1341}
1342
1343int blk_enable_write_cache(BlockBackend *blk)
1344{
1345    return blk->enable_write_cache;
1346}
1347
1348void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1349{
1350    blk->enable_write_cache = wce;
1351}
1352
1353void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1354{
1355    BlockDriverState *bs = blk_bs(blk);
1356
1357    if (!bs) {
1358        error_setg(errp, "Device '%s' has no medium", blk->name);
1359        return;
1360    }
1361
1362    bdrv_invalidate_cache(bs, errp);
1363}
1364
1365bool blk_is_inserted(BlockBackend *blk)
1366{
1367    BlockDriverState *bs = blk_bs(blk);
1368
1369    return bs && bdrv_is_inserted(bs);
1370}
1371
1372bool blk_is_available(BlockBackend *blk)
1373{
1374    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1375}
1376
1377void blk_lock_medium(BlockBackend *blk, bool locked)
1378{
1379    BlockDriverState *bs = blk_bs(blk);
1380
1381    if (bs) {
1382        bdrv_lock_medium(bs, locked);
1383    }
1384}
1385
1386void blk_eject(BlockBackend *blk, bool eject_flag)
1387{
1388    BlockDriverState *bs = blk_bs(blk);
1389    char *id;
1390
1391    /* blk_eject is only called by qdevified devices */
1392    assert(!blk->legacy_dev);
1393
1394    if (bs) {
1395        bdrv_eject(bs, eject_flag);
1396    }
1397
1398    /* Whether or not we ejected on the backend,
1399     * the frontend experienced a tray event. */
1400    id = blk_get_attached_dev_id(blk);
1401    qapi_event_send_device_tray_moved(blk_name(blk), id,
1402                                      eject_flag, &error_abort);
1403    g_free(id);
1404}
1405
1406int blk_get_flags(BlockBackend *blk)
1407{
1408    BlockDriverState *bs = blk_bs(blk);
1409
1410    if (bs) {
1411        return bdrv_get_flags(bs);
1412    } else {
1413        return blk->root_state.open_flags;
1414    }
1415}
1416
1417/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1418uint32_t blk_get_max_transfer(BlockBackend *blk)
1419{
1420    BlockDriverState *bs = blk_bs(blk);
1421    uint32_t max = 0;
1422
1423    if (bs) {
1424        max = bs->bl.max_transfer;
1425    }
1426    return MIN_NON_ZERO(max, INT_MAX);
1427}
1428
1429int blk_get_max_iov(BlockBackend *blk)
1430{
1431    return blk->root->bs->bl.max_iov;
1432}
1433
1434void blk_set_guest_block_size(BlockBackend *blk, int align)
1435{
1436    blk->guest_block_size = align;
1437}
1438
1439void *blk_try_blockalign(BlockBackend *blk, size_t size)
1440{
1441    return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1442}
1443
1444void *blk_blockalign(BlockBackend *blk, size_t size)
1445{
1446    return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1447}
1448
1449bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1450{
1451    BlockDriverState *bs = blk_bs(blk);
1452
1453    if (!bs) {
1454        return false;
1455    }
1456
1457    return bdrv_op_is_blocked(bs, op, errp);
1458}
1459
1460void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1461{
1462    BlockDriverState *bs = blk_bs(blk);
1463
1464    if (bs) {
1465        bdrv_op_unblock(bs, op, reason);
1466    }
1467}
1468
1469void blk_op_block_all(BlockBackend *blk, Error *reason)
1470{
1471    BlockDriverState *bs = blk_bs(blk);
1472
1473    if (bs) {
1474        bdrv_op_block_all(bs, reason);
1475    }
1476}
1477
1478void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1479{
1480    BlockDriverState *bs = blk_bs(blk);
1481
1482    if (bs) {
1483        bdrv_op_unblock_all(bs, reason);
1484    }
1485}
1486
1487AioContext *blk_get_aio_context(BlockBackend *blk)
1488{
1489    BlockDriverState *bs = blk_bs(blk);
1490
1491    if (bs) {
1492        return bdrv_get_aio_context(bs);
1493    } else {
1494        return qemu_get_aio_context();
1495    }
1496}
1497
1498static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1499{
1500    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1501    return blk_get_aio_context(blk_acb->blk);
1502}
1503
1504void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1505{
1506    BlockDriverState *bs = blk_bs(blk);
1507
1508    if (bs) {
1509        if (blk->public.throttle_state) {
1510            throttle_timers_detach_aio_context(&blk->public.throttle_timers);
1511        }
1512        bdrv_set_aio_context(bs, new_context);
1513        if (blk->public.throttle_state) {
1514            throttle_timers_attach_aio_context(&blk->public.throttle_timers,
1515                                               new_context);
1516        }
1517    }
1518}
1519
1520void blk_add_aio_context_notifier(BlockBackend *blk,
1521        void (*attached_aio_context)(AioContext *new_context, void *opaque),
1522        void (*detach_aio_context)(void *opaque), void *opaque)
1523{
1524    BlockDriverState *bs = blk_bs(blk);
1525
1526    if (bs) {
1527        bdrv_add_aio_context_notifier(bs, attached_aio_context,
1528                                      detach_aio_context, opaque);
1529    }
1530}
1531
1532void blk_remove_aio_context_notifier(BlockBackend *blk,
1533                                     void (*attached_aio_context)(AioContext *,
1534                                                                  void *),
1535                                     void (*detach_aio_context)(void *),
1536                                     void *opaque)
1537{
1538    BlockDriverState *bs = blk_bs(blk);
1539
1540    if (bs) {
1541        bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1542                                         detach_aio_context, opaque);
1543    }
1544}
1545
1546void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1547{
1548    notifier_list_add(&blk->remove_bs_notifiers, notify);
1549}
1550
1551void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1552{
1553    notifier_list_add(&blk->insert_bs_notifiers, notify);
1554}
1555
1556void blk_io_plug(BlockBackend *blk)
1557{
1558    BlockDriverState *bs = blk_bs(blk);
1559
1560    if (bs) {
1561        bdrv_io_plug(bs);
1562    }
1563}
1564
1565void blk_io_unplug(BlockBackend *blk)
1566{
1567    BlockDriverState *bs = blk_bs(blk);
1568
1569    if (bs) {
1570        bdrv_io_unplug(bs);
1571    }
1572}
1573
1574BlockAcctStats *blk_get_stats(BlockBackend *blk)
1575{
1576    return &blk->stats;
1577}
1578
1579void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1580                  BlockCompletionFunc *cb, void *opaque)
1581{
1582    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1583}
1584
1585int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1586                                      int count, BdrvRequestFlags flags)
1587{
1588    return blk_co_pwritev(blk, offset, count, NULL,
1589                          flags | BDRV_REQ_ZERO_WRITE);
1590}
1591
1592int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
1593                          int count)
1594{
1595    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1596                   BDRV_REQ_WRITE_COMPRESSED);
1597}
1598
1599int blk_truncate(BlockBackend *blk, int64_t offset)
1600{
1601    if (!blk_is_available(blk)) {
1602        return -ENOMEDIUM;
1603    }
1604
1605    return bdrv_truncate(blk_bs(blk), offset);
1606}
1607
1608static void blk_pdiscard_entry(void *opaque)
1609{
1610    BlkRwCo *rwco = opaque;
1611    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
1612}
1613
1614int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
1615{
1616    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
1617}
1618
1619int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1620                     int64_t pos, int size)
1621{
1622    int ret;
1623
1624    if (!blk_is_available(blk)) {
1625        return -ENOMEDIUM;
1626    }
1627
1628    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1629    if (ret < 0) {
1630        return ret;
1631    }
1632
1633    if (ret == size && !blk->enable_write_cache) {
1634        ret = bdrv_flush(blk_bs(blk));
1635    }
1636
1637    return ret < 0 ? ret : size;
1638}
1639
1640int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1641{
1642    if (!blk_is_available(blk)) {
1643        return -ENOMEDIUM;
1644    }
1645
1646    return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1647}
1648
1649int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1650{
1651    if (!blk_is_available(blk)) {
1652        return -ENOMEDIUM;
1653    }
1654
1655    return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1656}
1657
1658int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1659{
1660    if (!blk_is_available(blk)) {
1661        return -ENOMEDIUM;
1662    }
1663
1664    return bdrv_probe_geometry(blk_bs(blk), geo);
1665}
1666
1667/*
1668 * Updates the BlockBackendRootState object with data from the currently
1669 * attached BlockDriverState.
1670 */
1671void blk_update_root_state(BlockBackend *blk)
1672{
1673    assert(blk->root);
1674
1675    blk->root_state.open_flags    = blk->root->bs->open_flags;
1676    blk->root_state.read_only     = blk->root->bs->read_only;
1677    blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1678}
1679
1680/*
1681 * Returns the detect-zeroes setting to be used for bdrv_open() of a
1682 * BlockDriverState which is supposed to inherit the root state.
1683 */
1684bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
1685{
1686    return blk->root_state.detect_zeroes;
1687}
1688
1689/*
1690 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1691 * supposed to inherit the root state.
1692 */
1693int blk_get_open_flags_from_root_state(BlockBackend *blk)
1694{
1695    int bs_flags;
1696
1697    bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1698    bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1699
1700    return bs_flags;
1701}
1702
1703BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1704{
1705    return &blk->root_state;
1706}
1707
1708int blk_commit_all(void)
1709{
1710    BlockBackend *blk = NULL;
1711
1712    while ((blk = blk_all_next(blk)) != NULL) {
1713        AioContext *aio_context = blk_get_aio_context(blk);
1714
1715        aio_context_acquire(aio_context);
1716        if (blk_is_inserted(blk) && blk->root->bs->backing) {
1717            int ret = bdrv_commit(blk->root->bs);
1718            if (ret < 0) {
1719                aio_context_release(aio_context);
1720                return ret;
1721            }
1722        }
1723        aio_context_release(aio_context);
1724    }
1725    return 0;
1726}
1727
1728
1729/* throttling disk I/O limits */
1730void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1731{
1732    throttle_group_config(blk, cfg);
1733}
1734
1735void blk_io_limits_disable(BlockBackend *blk)
1736{
1737    assert(blk->public.throttle_state);
1738    bdrv_drained_begin(blk_bs(blk));
1739    throttle_group_unregister_blk(blk);
1740    bdrv_drained_end(blk_bs(blk));
1741}
1742
1743/* should be called before blk_set_io_limits if a limit is set */
1744void blk_io_limits_enable(BlockBackend *blk, const char *group)
1745{
1746    assert(!blk->public.throttle_state);
1747    throttle_group_register_blk(blk, group);
1748}
1749
1750void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1751{
1752    /* this BB is not part of any group */
1753    if (!blk->public.throttle_state) {
1754        return;
1755    }
1756
1757    /* this BB is a part of the same group than the one we want */
1758    if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1759        return;
1760    }
1761
1762    /* need to change the group this bs belong to */
1763    blk_io_limits_disable(blk);
1764    blk_io_limits_enable(blk, group);
1765}
1766
1767static void blk_root_drained_begin(BdrvChild *child)
1768{
1769    BlockBackend *blk = child->opaque;
1770
1771    /* Note that blk->root may not be accessible here yet if we are just
1772     * attaching to a BlockDriverState that is drained. Use child instead. */
1773
1774    if (blk->public.io_limits_disabled++ == 0) {
1775        throttle_group_restart_blk(blk);
1776    }
1777}
1778
1779static void blk_root_drained_end(BdrvChild *child)
1780{
1781    BlockBackend *blk = child->opaque;
1782
1783    assert(blk->public.io_limits_disabled);
1784    --blk->public.io_limits_disabled;
1785}
1786